From 7a4642f7f60349c2a59784f6837780780f99d5c8 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 19 Aug 2024 09:15:48 +0100
Subject: [PATCH 01/53] feat (example/llm): Moved argument parser to separate
 function.

---
 src/brevitas_examples/llm/main.py | 311 +++++++++++++++---------------
 1 file changed, 157 insertions(+), 154 deletions(-)

diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py
index 05d84f647..94f596529 100644
--- a/src/brevitas_examples/llm/main.py
+++ b/src/brevitas_examples/llm/main.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 """
 
+import sys
 import argparse
 import re
 
@@ -40,157 +41,6 @@
 from brevitas_examples.llm.llm_quant.run_utils import get_fx
 from brevitas_examples.llm.llm_quant.run_utils import modify_dataloader
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    '--model',
-    type=str,
-    default="facebook/opt-125m",
-    help='HF model name. Default: facebook/opt-125m.')
-parser.add_argument(
-    '--seed', type=int, default=0, help='Seed for sampling the calibration data. Default: 0.')
-parser.add_argument(
-    '--nsamples', type=int, default=128, help='Number of calibration data samples. Default: 128.')
-parser.add_argument('--seqlen', type=int, default=2048, help='Sequence length. Default: 2048.')
-parser.add_argument('--eval', action='store_true', help='Eval model PPL on the chosen Dataset.')
-parser.add_argument(
-    '--dataset',
-    type=str,
-    choices=['wikitext2', 'c4'],
-    default='wikitext2',
-    help='Dataset to use for quantization (default: %(default)s)')
-parser.add_argument('--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.')
-parser.add_argument(
-    '--weight-param-method',
-    type=str,
-    default='stats',
-    choices=['stats', 'mse'],
-    help='How scales/zero-point are determined. Default: stats.')
-parser.add_argument(
-    '--weight-scale-precision',
-    type=str,
-    default='float_scale',
-    choices=['float_scale', 'po2_scale'],
-    help='Whether scale is a float value or a po2. Default: po2.')
-parser.add_argument(
-    '--weight-quant-type',
-    type=str,
-    default='sym',
-    choices=['sym', 'asym'],
-    help='Weight quantization type. Default: asym.')
-parser.add_argument(
-    '--weight-quant-format',
-    type=quant_format_validator,
-    default='int',
-    help=
-    'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.'
-)
-parser.add_argument(
-    '--weight-quant-granularity',
-    type=str,
-    default='per_group',
-    choices=['per_channel', 'per_tensor', 'per_group'],
-    help='Granularity for scales/zero-point of weights. Default: per_group.')
-parser.add_argument(
-    '--weight-group-size',
-    type=int,
-    default=128,
-    help='Group size for per_group weight quantization. Default: 128.')
-parser.add_argument(
-    '--quantize-weight-zero-point', action='store_true', help='Quantize weight zero-point.')
-parser.add_argument(
-    '--input-bit-width',
-    type=int,
-    default=None,
-    help='Input bit width. Default: None (disables input quantization).')
-parser.add_argument(
-    '--input-quant-format',
-    type=quant_format_validator,
-    default='int',
-    help=
-    'Input quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.'
-)
-parser.add_argument(
-    '--input-param-method',
-    type=str,
-    default='stats',
-    choices=['stats', 'mse'],
-    help=
-    'How scales/zero-point are determined. Default: stats (percentile for static, absmax or minmax for dynamic).'
-)
-parser.add_argument(
-    '--input-scale-precision',
-    type=str,
-    default='float_scale',
-    choices=['float_scale', 'po2_scale'],
-    help='Whether input scale is a float value or a po2. Default: float.')
-parser.add_argument(
-    '--input-scale-type',
-    type=str,
-    default='static',
-    choices=['static', 'dynamic', 'no_scale'],
-    help='Whether input scale is a static value or a dynamic value.')
-parser.add_argument(
-    '--input-quant-type',
-    type=str,
-    default='asym',
-    choices=['sym', 'asym'],
-    help='Input quantization type. Default: asym.')
-parser.add_argument(
-    '--input-quant-granularity',
-    type=str,
-    default='per_tensor',
-    choices=['per_tensor', 'per_row', 'per_group'],
-    help='Granularity for scales/zero-point of inputs. Default: per_tensor.')
-parser.add_argument(
-    '--input-group-size',
-    type=int,
-    default=64,
-    help='Group size for per_group input quantization. Default: 64.')
-parser.add_argument(
-    '--quantize-input-zero-point', action='store_true', help='Quantize input zero-point.')
-parser.add_argument(
-    '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.')
-parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.')
-parser.add_argument('--act-calibration', action='store_true', help='Apply activation calibration.')
-parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.')
-parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.')
-parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.')
-parser.add_argument(
-    '--no-float16',
-    action='store_true',
-    help='Disable float16 as base datatype and switch to float32.')
-parser.add_argument(
-    '--replace-mha',
-    action='store_true',
-    help='Replace HuggingFace Attention with a quantizable version')
-parser.add_argument(
-    '--weight-equalization',
-    action='store_true',
-    help='Apply weight equalization. Relevant to ReLU based models (e.g. OPT).')
-parser.add_argument(
-    '--act-equalization',
-    default=None,
-    choices=[None, 'layerwise', 'fx'],
-    help='Apply activation equalization (SmoothQuant). Layerwise introduces standalone mul nodes,'
-    'while fx merges them whenever possible into previous tensors, which is possible on ReLU based models (e.g. OPT).'
-)
-parser.add_argument('--load-awq', type=str, default=None, help="Load the awq search results.")
-parser.add_argument(
-    '--export-target',
-    default=None,
-    choices=[
-        None,
-        'onnx_qcdq',
-        'torch_qcdq',
-        'sharded_torchmlir_group_weight',
-        'sharded_packed_torchmlir_group_weight'],
-    help='Model export.')
-parser.add_argument(
-    '--checkpoint-name',
-    type=str,
-    default=None,
-    help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)")
-
 
 def set_seed(seed):
     np.random.seed(seed)
@@ -261,8 +111,7 @@ def validate(args):
                 assert args.export_target != 'torch_qcdq', "Cannot export Torch QCDQ with FX"
 
 
-def main():
-    args = parser.parse_args()
+def main(args):
     validate(args)
     set_seed(args.seed)
 
@@ -448,5 +297,159 @@ def main():
         model_export(model, calibration_loader[0], args)
 
 
+def parse_args(args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--model',
+        type=str,
+        default="facebook/opt-125m",
+        help='HF model name. Default: facebook/opt-125m.')
+    parser.add_argument(
+        '--seed', type=int, default=0, help='Seed for sampling the calibration data. Default: 0.')
+    parser.add_argument(
+        '--nsamples', type=int, default=128, help='Number of calibration data samples. Default: 128.')
+    parser.add_argument('--seqlen', type=int, default=2048, help='Sequence length. Default: 2048.')
+    parser.add_argument('--eval', action='store_true', help='Eval model PPL on the chosen Dataset.')
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        choices=['wikitext2', 'c4'],
+        default='wikitext2',
+        help='Dataset to use for quantization (default: %(default)s)')
+    parser.add_argument('--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.')
+    parser.add_argument(
+        '--weight-param-method',
+        type=str,
+        default='stats',
+        choices=['stats', 'mse'],
+        help='How scales/zero-point are determined. Default: stats.')
+    parser.add_argument(
+        '--weight-scale-precision',
+        type=str,
+        default='float_scale',
+        choices=['float_scale', 'po2_scale'],
+        help='Whether scale is a float value or a po2. Default: po2.')
+    parser.add_argument(
+        '--weight-quant-type',
+        type=str,
+        default='sym',
+        choices=['sym', 'asym'],
+        help='Weight quantization type. Default: asym.')
+    parser.add_argument(
+        '--weight-quant-format',
+        type=quant_format_validator,
+        default='int',
+        help=
+        'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.'
+        )
+    parser.add_argument(
+        '--weight-quant-granularity',
+        type=str,
+        default='per_group',
+        choices=['per_channel', 'per_tensor', 'per_group'],
+        help='Granularity for scales/zero-point of weights. Default: per_group.')
+    parser.add_argument(
+        '--weight-group-size',
+        type=int,
+        default=128,
+        help='Group size for per_group weight quantization. Default: 128.')
+    parser.add_argument(
+        '--quantize-weight-zero-point', action='store_true', help='Quantize weight zero-point.')
+    parser.add_argument(
+        '--input-bit-width',
+        type=int,
+        default=None,
+        help='Input bit width. Default: None (disables input quantization).')
+    parser.add_argument(
+        '--input-quant-format',
+        type=quant_format_validator,
+        default='int',
+        help=
+        'Input quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.'
+        )
+    parser.add_argument(
+        '--input-param-method',
+        type=str,
+        default='stats',
+        choices=['stats', 'mse'],
+        help=
+        'How scales/zero-point are determined. Default: stats (percentile for static, absmax or minmax for dynamic).'
+    )
+    parser.add_argument(
+        '--input-scale-precision',
+        type=str,
+        default='float_scale',
+        choices=['float_scale', 'po2_scale'],
+        help='Whether input scale is a float value or a po2. Default: float.')
+    parser.add_argument(
+        '--input-scale-type',
+        type=str,
+        default='static',
+        choices=['static', 'dynamic', 'no_scale'],
+        help='Whether input scale is a static value or a dynamic value.')
+    parser.add_argument(
+        '--input-quant-type',
+        type=str,
+        default='asym',
+        choices=['sym', 'asym'],
+        help='Input quantization type. Default: asym.')
+    parser.add_argument(
+        '--input-quant-granularity',
+        type=str,
+        default='per_tensor',
+        choices=['per_tensor', 'per_row', 'per_group'],
+        help='Granularity for scales/zero-point of inputs. Default: per_tensor.')
+    parser.add_argument(
+        '--input-group-size',
+        type=int,
+        default=64,
+        help='Group size for per_group input quantization. Default: 64.')
+    parser.add_argument(
+        '--quantize-input-zero-point', action='store_true', help='Quantize input zero-point.')
+    parser.add_argument(
+        '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.')
+    parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.')
+    parser.add_argument('--act-calibration', action='store_true', help='Apply activation calibration.')
+    parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.')
+    parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.')
+    parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.')
+    parser.add_argument(
+        '--no-float16',
+        action='store_true',
+        help='Disable float16 as base datatype and switch to float32.')
+    parser.add_argument(
+        '--replace-mha',
+        action='store_true',
+        help='Replace HuggingFace Attention with a quantizable version')
+    parser.add_argument(
+        '--weight-equalization',
+        action='store_true',
+        help='Apply weight equalization. Relevant to ReLU based models (e.g. OPT).')
+    parser.add_argument(
+        '--act-equalization',
+        default=None,
+        choices=[None, 'layerwise', 'fx'],
+        help='Apply activation equalization (SmoothQuant). Layerwise introduces standalone mul nodes,'
+        'while fx merges them whenever possible into previous tensors, which is possible on ReLU based models (e.g. OPT).'
+    )
+    parser.add_argument('--load-awq', type=str, default=None, help="Load the awq search results.")
+    parser.add_argument(
+        '--export-target',
+        default=None,
+        choices=[
+            None,
+            'onnx_qcdq',
+            'torch_qcdq',
+            'sharded_torchmlir_group_weight',
+            'sharded_packed_torchmlir_group_weight'],
+        help='Model export.')
+    parser.add_argument(
+        '--checkpoint-name',
+        type=str,
+        default=None,
+        help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)")
+    return parser.parse_args(args)
+
 if __name__ == '__main__':
-    main()
+    args = parse_args(sys.argv[1:])
+    main(args)

From cd73b1409157c5f0c7f4025057b9ea480d47eafe Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 19 Aug 2024 17:07:14 +0100
Subject: [PATCH 02/53] Refactor (example/llm): Refactor to prepare for
 automated tests.

---
 src/brevitas_examples/llm/main.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py
index 94f596529..78cf8ba49 100644
--- a/src/brevitas_examples/llm/main.py
+++ b/src/brevitas_examples/llm/main.py
@@ -3,9 +3,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 """
 
-import sys
 import argparse
 import re
+import sys
 
 import numpy as np
 from optimum.amd.brevitas.accelerate_utils import offload_model
@@ -130,6 +130,8 @@ def main(args):
     print("Model loaded.")
     model.eval()
     tokenizer = AutoTokenizer.from_pretrained(args.model)
+    float_ppl = None
+    quant_ppl = None
 
     if args.load_awq:
         from brevitas_examples.llm.llm_quant.awq.pre_quant import apply_awq
@@ -174,10 +176,10 @@ def main(args):
         assert args.export_target != 'torch_qcdq', "TorchScript QCDQ export and Evaluation simultaneously"
         print("Float model eval...")
         model = offload_model(model)
-        ppl = compute_perplexity(
+        float_ppl = compute_perplexity(
             model, validation_loader, context_length=args.seqlen // 2, tokenizer=tokenizer)
         remove_hooks(model)
-        print(f"Float perplexity ({args.dataset}): {ppl}")
+        print(f"Float perplexity ({args.dataset}): {float_ppl}")
 
     if require_fx:
         model = get_fx(model)
@@ -281,9 +283,9 @@ def main(args):
 
     if args.eval:
         print("Model eval...")
-        ppl = compute_perplexity(
+        quant_ppl = compute_perplexity(
             model, validation_loader, context_length=args.seqlen // 2, tokenizer=tokenizer)
-        print(f"Quantized perplexity ({args.dataset}): {ppl}")
+        print(f"Quantized perplexity ({args.dataset}): {quant_ppl}")
     remove_hooks(model)
 
     if args.checkpoint_name is not None:
@@ -296,6 +298,8 @@ def main(args):
         model = model.to(dtype=torch.float32)
         model_export(model, calibration_loader[0], args)
 
+    return float_ppl, quant_ppl, model
+
 
 def parse_args(args):
     parser = argparse.ArgumentParser()
@@ -307,7 +311,10 @@ def parse_args(args):
     parser.add_argument(
         '--seed', type=int, default=0, help='Seed for sampling the calibration data. Default: 0.')
     parser.add_argument(
-        '--nsamples', type=int, default=128, help='Number of calibration data samples. Default: 128.')
+        '--nsamples',
+        type=int,
+        default=128,
+        help='Number of calibration data samples. Default: 128.')
     parser.add_argument('--seqlen', type=int, default=2048, help='Sequence length. Default: 2048.')
     parser.add_argument('--eval', action='store_true', help='Eval model PPL on the chosen Dataset.')
     parser.add_argument(
@@ -316,7 +323,8 @@ def parse_args(args):
         choices=['wikitext2', 'c4'],
         default='wikitext2',
         help='Dataset to use for quantization (default: %(default)s)')
-    parser.add_argument('--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.')
+    parser.add_argument(
+        '--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.')
     parser.add_argument(
         '--weight-param-method',
         type=str,
@@ -409,7 +417,8 @@ def parse_args(args):
     parser.add_argument(
         '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.')
     parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.')
-    parser.add_argument('--act-calibration', action='store_true', help='Apply activation calibration.')
+    parser.add_argument(
+        '--act-calibration', action='store_true', help='Apply activation calibration.')
     parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.')
     parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.')
     parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.')
@@ -450,6 +459,7 @@ def parse_args(args):
         help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)")
     return parser.parse_args(args)
 
+
 if __name__ == '__main__':
     args = parse_args(sys.argv[1:])
     main(args)

From dcd6a782c310797a46957ae264bc4cbc7509e77f Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 19 Aug 2024 17:09:44 +0100
Subject: [PATCH 03/53] test (example/llm): Added inital end-to-end example for
 LLM entry-point.

---
 tests/brevitas_examples/llm.py | 113 +++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 tests/brevitas_examples/llm.py

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
new file mode 100644
index 000000000..5188141e3
--- /dev/null
+++ b/tests/brevitas_examples/llm.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+
+from dataclasses import dataclass
+from argparse import Namespace
+import logging
+import shutil
+
+import pytest
+
+import numpy as np
+
+from brevitas_examples.llm.main import main
+from brevitas_examples.llm.main import parse_args
+
+
+def ptid2pathname(string):
+    return string.replace("/", "-").replace(":", "-")
+
+
+def allclose(x, y):
+    return np.allclose(x, y, rtol=1e-02, atol=5e-01, equal_nan=False)
+
+
+def allexact(x, y):
+    return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False)
+
+
+def requires_fx(args):
+    return args.act_equalization == "fx" or args.weight_equalization or args.ln_affine_merge
+
+
+@dataclass
+class ModelAndPpl:
+    name: str
+    float_ppl: float
+    quant_ppl: float
+    supports_fx: bool
+
+
+class UpdatableNamespace(Namespace):
+    def update(self, **kwargs):
+        self.__dict__.update(**kwargs)
+
+
+@pytest.fixture(scope="session", params=[
+    ModelAndPpl(
+        name="hf-internal-testing/tiny-random-LlamaForCausalLM",
+        float_ppl=None,
+        quant_ppl=None,
+        supports_fx=True,
+    ),
+    ModelAndPpl(
+        name="hf-internal-testing/tiny-random-OPTForCausalLM",
+        float_ppl=None,
+        quant_ppl=None,
+        supports_fx=True,
+    ),
+    ModelAndPpl(
+        name="hf-internal-testing/tiny-random-MistralForCausalLM",
+        float_ppl=None,
+        quant_ppl=None,
+        supports_fx=False,
+    ),
+])
+def small_models_with_ppl(request):
+    yield request.param
+
+
+@pytest.fixture()
+def default_run_args(request):
+    args = UpdatableNamespace(**vars(parse_args([])))
+    args.nsamples = 2
+    args.seqlen = 2
+    args.model = "hf-internal-testing/tiny-random-MistralForCausalLM"
+    args.dataset = "c4"
+    args.eval = True
+    #args.checkpoint = ptid2pathname(request.node.nodeid) + ".pth" # Example filename which won't clash
+    args.weight_bit_width = 8
+    args.weight_quant_granularity = "per_channel" # "per_tensor", "per_channel", "per_group".
+    args.input_bit_width = 8
+    args.act_calibration = True
+    return args
+
+
+@pytest.fixture(params=[
+        {},
+        {"bias_corr": True},
+        {"act_equalization": "layerwise"},
+        {"act_equalization": "fx"},
+        {"weight_equalization": True},
+        {"gptq": True},
+        {"ln_affine_merge": True},
+        {"replace_mha": True},
+    ])
+def toggle_run_args(default_run_args, request):
+    args = default_run_args
+    args.update(**request.param)
+    yield args
+
+
+@pytest.mark.examples
+@pytest.mark.weekly
+def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
+    caplog.set_level(logging.INFO)
+    args = toggle_run_args
+    args.model = small_models_with_ppl.name
+    exp_float_ppl = small_models_with_ppl.float_ppl
+    exp_quant_ppl = small_models_with_ppl.quant_ppl
+    use_fx = requires_fx(args)
+    if use_fx and not small_models_with_ppl.supports_fx:
+        pytest.xfail(f"{small_models_with_ppl.name} does not support FX")
+    float_ppl, quant_ppl, model = main(args)

From 3ef28d4a6f0d26fed71d557876c17fecef2fa5d1 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 19 Aug 2024 17:45:02 +0100
Subject: [PATCH 04/53] setup (requirements): added requirements file for LLM
 example for easier testing

---
 requirements/requirements-example-llm.txt | 1 +
 setup.py                                  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 requirements/requirements-example-llm.txt

diff --git a/requirements/requirements-example-llm.txt b/requirements/requirements-example-llm.txt
new file mode 100644
index 000000000..a1a2c615f
--- /dev/null
+++ b/requirements/requirements-example-llm.txt
@@ -0,0 +1 @@
+optimum-amd[brevitas] @ git+https://github.com/huggingface/optimum-amd.git@main
diff --git a/setup.py b/setup.py
index 10e920981..817c7f88e 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,8 @@ def read_requirements(filename):
         "stt": read_requirements('requirements-stt.txt'),
         "vision": read_requirements('requirements-vision.txt'),
         "finn_integration": read_requirements('requirements-finn-integration.txt'),
-        "ort_integration": read_requirements('requirements-ort-integration.txt')},
+        "ort_integration": read_requirements('requirements-ort-integration.txt'),
+        "example_llm": read_requirements('requirements-example-llm.txt')},
     packages=find_packages('src'),
     package_dir={'': 'src'},
     zip_safe=False,

From 5aad69e37bc75deef150a0c6435f42981a3ff299 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Tue, 20 Aug 2024 17:44:45 +0100
Subject: [PATCH 05/53] Fix: pre-commit

---
 src/brevitas_examples/llm/main.py |  7 +--
 tests/brevitas_examples/llm.py    | 74 +++++++++++++++++--------------
 2 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py
index 78cf8ba49..6c3c34dd5 100644
--- a/src/brevitas_examples/llm/main.py
+++ b/src/brevitas_examples/llm/main.py
@@ -349,7 +349,7 @@ def parse_args(args):
         default='int',
         help=
         'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.'
-        )
+    )
     parser.add_argument(
         '--weight-quant-granularity',
         type=str,
@@ -374,7 +374,7 @@ def parse_args(args):
         default='int',
         help=
         'Input quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.'
-        )
+    )
     parser.add_argument(
         '--input-param-method',
         type=str,
@@ -456,7 +456,8 @@ def parse_args(args):
         '--checkpoint-name',
         type=str,
         default=None,
-        help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)")
+        help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)"
+    )
     return parser.parse_args(args)
 
 
diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 5188141e3..d251d040e 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -1,14 +1,13 @@
 # Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 
-from dataclasses import dataclass
 from argparse import Namespace
+from dataclasses import dataclass
 import logging
 import shutil
 
-import pytest
-
 import numpy as np
+import pytest
 
 from brevitas_examples.llm.main import main
 from brevitas_examples.llm.main import parse_args
@@ -39,30 +38,32 @@ class ModelAndPpl:
 
 
 class UpdatableNamespace(Namespace):
+
     def update(self, **kwargs):
         self.__dict__.update(**kwargs)
 
 
-@pytest.fixture(scope="session", params=[
-    ModelAndPpl(
-        name="hf-internal-testing/tiny-random-LlamaForCausalLM",
-        float_ppl=None,
-        quant_ppl=None,
-        supports_fx=True,
-    ),
-    ModelAndPpl(
-        name="hf-internal-testing/tiny-random-OPTForCausalLM",
-        float_ppl=None,
-        quant_ppl=None,
-        supports_fx=True,
-    ),
-    ModelAndPpl(
-        name="hf-internal-testing/tiny-random-MistralForCausalLM",
-        float_ppl=None,
-        quant_ppl=None,
-        supports_fx=False,
-    ),
-])
+@pytest.fixture(
+    scope="session",
+    params=[
+        ModelAndPpl(
+            name="hf-internal-testing/tiny-random-LlamaForCausalLM",
+            float_ppl=None,
+            quant_ppl=None,
+            supports_fx=True,
+        ),
+        ModelAndPpl(
+            name="hf-internal-testing/tiny-random-OPTForCausalLM",
+            float_ppl=None,
+            quant_ppl=None,
+            supports_fx=True,
+        ),
+        ModelAndPpl(
+            name="hf-internal-testing/tiny-random-MistralForCausalLM",
+            float_ppl=None,
+            quant_ppl=None,
+            supports_fx=False,
+        ),])
 def small_models_with_ppl(request):
     yield request.param
 
@@ -77,22 +78,29 @@ def default_run_args(request):
     args.eval = True
     #args.checkpoint = ptid2pathname(request.node.nodeid) + ".pth" # Example filename which won't clash
     args.weight_bit_width = 8
-    args.weight_quant_granularity = "per_channel" # "per_tensor", "per_channel", "per_group".
+    args.weight_quant_granularity = "per_channel"  # "per_tensor", "per_channel", "per_group".
     args.input_bit_width = 8
     args.act_calibration = True
     return args
 
 
-@pytest.fixture(params=[
+@pytest.fixture(
+    params=[
         {},
-        {"bias_corr": True},
-        {"act_equalization": "layerwise"},
-        {"act_equalization": "fx"},
-        {"weight_equalization": True},
-        {"gptq": True},
-        {"ln_affine_merge": True},
-        {"replace_mha": True},
-    ])
+        {
+            "bias_corr": True},
+        {
+            "act_equalization": "layerwise"},
+        {
+            "act_equalization": "fx"},
+        {
+            "weight_equalization": True},
+        {
+            "gptq": True},
+        {
+            "ln_affine_merge": True},
+        {
+            "replace_mha": True},])
 def toggle_run_args(default_run_args, request):
     args = default_run_args
     args.update(**request.param)

From 25d79e3efe99b805bba7d2537476b35a1fcabfcd Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Tue, 20 Aug 2024 18:13:53 +0100
Subject: [PATCH 06/53] test (ex/llm): reorg code a little bit.

---
 tests/brevitas_examples/llm.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index d251d040e..d8f5720d6 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -25,6 +25,12 @@ def allexact(x, y):
     return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False)
 
 
+class UpdatableNamespace(Namespace):
+
+    def update(self, **kwargs):
+        self.__dict__.update(**kwargs)
+
+
 def requires_fx(args):
     return args.act_equalization == "fx" or args.weight_equalization or args.ln_affine_merge
 
@@ -37,12 +43,6 @@ class ModelAndPpl:
     supports_fx: bool
 
 
-class UpdatableNamespace(Namespace):
-
-    def update(self, **kwargs):
-        self.__dict__.update(**kwargs)
-
-
 @pytest.fixture(
     scope="session",
     params=[

From e3575f4ac961f7cc8ef5739fce9a6a409ccbdb1e Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Tue, 20 Aug 2024 18:18:03 +0100
Subject: [PATCH 07/53] test (ex/llm): removed quant_ppl to model definition

---
 tests/brevitas_examples/llm.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index d8f5720d6..a200b4386 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -39,7 +39,6 @@ def requires_fx(args):
 class ModelAndPpl:
     name: str
     float_ppl: float
-    quant_ppl: float
     supports_fx: bool
 
 
@@ -49,19 +48,16 @@ class ModelAndPpl:
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-LlamaForCausalLM",
             float_ppl=None,
-            quant_ppl=None,
             supports_fx=True,
         ),
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-OPTForCausalLM",
             float_ppl=None,
-            quant_ppl=None,
             supports_fx=True,
         ),
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-MistralForCausalLM",
             float_ppl=None,
-            quant_ppl=None,
             supports_fx=False,
         ),])
 def small_models_with_ppl(request):
@@ -114,7 +110,6 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
     args = toggle_run_args
     args.model = small_models_with_ppl.name
     exp_float_ppl = small_models_with_ppl.float_ppl
-    exp_quant_ppl = small_models_with_ppl.quant_ppl
     use_fx = requires_fx(args)
     if use_fx and not small_models_with_ppl.supports_fx:
         pytest.xfail(f"{small_models_with_ppl.name} does not support FX")

From 6f8b18660440c7de7481010d5543360dc9c89d99 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Tue, 20 Aug 2024 18:55:30 +0100
Subject: [PATCH 08/53] test (ex/llm): removed replace MHA from toggle tests

---
 tests/brevitas_examples/llm.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index a200b4386..d40a62799 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -94,9 +94,7 @@ def default_run_args(request):
         {
             "gptq": True},
         {
-            "ln_affine_merge": True},
-        {
-            "replace_mha": True},])
+            "ln_affine_merge": True},])
 def toggle_run_args(default_run_args, request):
     args = default_run_args
     args.update(**request.param)

From a5eedeb50ef23828a8f9da8a4e2a9bd2008f2186 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Tue, 20 Aug 2024 18:56:24 +0100
Subject: [PATCH 09/53] test (ex/llm): added accuracy tests for some basic
 configurations

---
 tests/brevitas_examples/llm.py | 44 ++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index d40a62799..f389b780d 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -112,3 +112,47 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
     if use_fx and not small_models_with_ppl.supports_fx:
         pytest.xfail(f"{small_models_with_ppl.name} does not support FX")
     float_ppl, quant_ppl, model = main(args)
+
+
+@pytest.fixture(
+    params=[
+        {
+            "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
+            "act_equalization": "layerwise",
+            "gptq": True,
+            "float_ppl": 31274.05078125,
+            "quant_ppl": 33139.23046875},
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "act_equalization": "fx",
+            "bias_corr": True,
+            "float_ppl": 33239.5,
+            "quant_ppl": 33283.75390625},
+        {
+            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
+            "weight_equalization": True,
+            "ln_affine_merge": True,
+            "replace_mha": True,
+            "float_ppl": 50016.0,
+            "quant_ppl": 50016.0},])
+def acc_args_and_acc(default_run_args, request):
+    args = default_run_args
+    run_dict = request.param
+    float_ppl = run_dict["float_ppl"]
+    quant_ppl = run_dict["quant_ppl"]
+    del run_dict["float_ppl"]
+    del run_dict["quant_ppl"]
+    args.update(**run_dict)
+    yield args, float_ppl, quant_ppl
+
+
+@pytest.mark.examples
+@pytest.mark.weekly
+def test_small_models_acc(caplog, acc_args_and_acc):
+    caplog.set_level(logging.INFO)
+    args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
+    float_ppl, quant_ppl, model = main(args)
+    float_ppl = float_ppl.detach().cpu().numpy()
+    quant_ppl = quant_ppl.detach().cpu().numpy()
+    assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+    assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"

From 89ffa008dcca9bc57e054e151b645312270c5a0f Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 12:13:39 +0100
Subject: [PATCH 10/53] test (ex/llm): Added initial tests that layers are
 inserted properly.

---
 tests/brevitas_examples/llm.py | 39 ++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index f389b780d..4e4f90f3a 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -25,6 +25,15 @@ def allexact(x, y):
     return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False)
 
 
+def assert_layer_type(model, key, string):
+    for name, layer in model.named_modules():
+        if name == key:
+            ltype = str(type(layer))
+            assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}"
+            return
+    assert False, f"Layer key: {key} not found"
+
+
 class UpdatableNamespace(Namespace):
 
     def update(self, **kwargs):
@@ -156,3 +165,33 @@ def test_small_models_acc(caplog, acc_args_and_acc):
     quant_ppl = quant_ppl.detach().cpu().numpy()
     assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
     assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+
+
+@pytest.fixture(
+    params=[
+        {
+            "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
+            "quantize_last_layer": False,
+            "layer_key": "lm_head",
+            "layer_type": "<class 'torch.nn.modules.linear.Linear'>"},
+        {
+            "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
+            "quantize_last_layer": True,
+            "layer_key": "lm_head",
+            "layer_type": "<class 'brevitas.nn.quant_linear.QuantLinear'>"},])
+def layer_args(default_run_args, request):
+    args = default_run_args
+    layer_dict = request.param
+    layer_key = layer_dict["layer_key"]
+    layer_type = layer_dict["layer_type"]
+    del layer_dict["layer_key"]
+    del layer_dict["layer_type"]
+    args.update(**layer_dict)
+    yield args, layer_key, layer_type
+
+
+def test_small_models_quant_layer(caplog, layer_args):
+    caplog.set_level(logging.INFO)
+    args, layer_key, layer_type = layer_args
+    float_ppl, quant_ppl, model = main(args)
+    assert_layer_type(model, layer_key, layer_type)

From cb149b4a8fec60f5fde419ab58eee891388da0d7 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 12:44:45 +0100
Subject: [PATCH 11/53] test (ex/llm): allowed testing of multiple layer types

---
 tests/brevitas_examples/llm.py | 35 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 4e4f90f3a..4664523a0 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -25,13 +25,16 @@ def allexact(x, y):
     return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False)
 
 
-def assert_layer_type(model, key, string):
-    for name, layer in model.named_modules():
-        if name == key:
-            ltype = str(type(layer))
-            assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}"
-            return
-    assert False, f"Layer key: {key} not found"
+def assert_layer_types(model, exp_layer_types):
+    for key, string in exp_layer_types.items():
+        matched = False
+        for name, layer in model.named_modules():
+            if name == key:
+                matched = True
+                ltype = str(type(layer))
+                assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}"
+                continue
+        assert matched, f"Layer key: {key} not found"
 
 
 class UpdatableNamespace(Namespace):
@@ -172,26 +175,22 @@ def test_small_models_acc(caplog, acc_args_and_acc):
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "quantize_last_layer": False,
-            "layer_key": "lm_head",
-            "layer_type": "<class 'torch.nn.modules.linear.Linear'>"},
+            "exp_layer_types": {"lm_head": "<class 'torch.nn.modules.linear.Linear'>"}},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "quantize_last_layer": True,
-            "layer_key": "lm_head",
-            "layer_type": "<class 'brevitas.nn.quant_linear.QuantLinear'>"},])
+            "exp_layer_types": {"lm_head": "<class 'brevitas.nn.quant_linear.QuantLinear'>"}},])
 def layer_args(default_run_args, request):
     args = default_run_args
     layer_dict = request.param
-    layer_key = layer_dict["layer_key"]
-    layer_type = layer_dict["layer_type"]
-    del layer_dict["layer_key"]
-    del layer_dict["layer_type"]
+    exp_layer_types = layer_dict["exp_layer_types"]
+    del layer_dict["exp_layer_types"]
     args.update(**layer_dict)
-    yield args, layer_key, layer_type
+    yield args, exp_layer_types
 
 
 def test_small_models_quant_layer(caplog, layer_args):
     caplog.set_level(logging.INFO)
-    args, layer_key, layer_type = layer_args
+    args, exp_layer_types = layer_args
     float_ppl, quant_ppl, model = main(args)
-    assert_layer_type(model, layer_key, layer_type)
+    assert_layer_types(model, exp_layer_types)

From 5e7a7e753acf203d9a82e732210d8912dbbfa1d4 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 13:20:56 +0100
Subject: [PATCH 12/53] test (ex/llm): Added extra layer replacement checks

---
 tests/brevitas_examples/llm.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 4664523a0..33f347699 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -28,13 +28,15 @@ def allexact(x, y):
 def assert_layer_types(model, exp_layer_types):
     for key, string in exp_layer_types.items():
         matched = False
+        layer_names = []
         for name, layer in model.named_modules():
+            layer_names += [name]
             if name == key:
                 matched = True
                 ltype = str(type(layer))
                 assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}"
                 continue
-        assert matched, f"Layer key: {key} not found"
+        assert matched, f"Layer key: {key} not found in {layer_names}"
 
 
 class UpdatableNamespace(Namespace):
@@ -174,8 +176,26 @@ def test_small_models_acc(caplog, acc_args_and_acc):
     params=[
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
-            "quantize_last_layer": False,
-            "exp_layer_types": {"lm_head": "<class 'torch.nn.modules.linear.Linear'>"}},
+            "exp_layer_types": {
+                "lm_head": "<class 'torch.nn.modules.linear.Linear'>",
+                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
+                }},
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "act_equalization": "layerwise",
+            "exp_layer_types": {
+                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.equalized_layer.EqualizedModule'>",
+                "model.layers.0.self_attn.q_proj.layer": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                }},
+        {
+            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
+            "replace_mha": True,
+            "exp_layer_types": {
+                "model.decoder.layers.0.self_attn": "<class 'brevitas_examples.llm.llm_quant.mha_layers.QuantizableOPTAttention'>",
+                "model.decoder.layers.0.self_attn.mha": "<class 'brevitas.nn.quant_mha.QuantMultiheadAttention'>",
+                }},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "quantize_last_layer": True,

From 9dd38aa73c2b44926ca9c2b80b9f3e7dfb9614ee Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 15:50:58 +0100
Subject: [PATCH 13/53] Fix (ex/gen): Bugfix when applying MX as activation
 quantization type

---
 src/brevitas_examples/common/generative/quantize.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/brevitas_examples/common/generative/quantize.py b/src/brevitas_examples/common/generative/quantize.py
index 57670f6f6..73831d5fa 100644
--- a/src/brevitas_examples/common/generative/quantize.py
+++ b/src/brevitas_examples/common/generative/quantize.py
@@ -153,7 +153,8 @@
                         'sym': Int8DynamicActPerGroupFloat}}},
             'po2_scale': {
                 'stats': {
-                    'per_group': MXInt8Act}}}},
+                    'per_group': {
+                        'sym': MXInt8Act}}}}},
     'float': {
         'static': {
             'float_scale': {
@@ -175,7 +176,8 @@
         'dynamic': {
             'po2_scale': {
                 'stats': {
-                    'per_group': MXFloat8e4m3Act}}}},
+                    'per_group': {
+                        'sym': MXFloat8e4m3Act}}}}},
     'float_fnuz': {
         'static': {
             'float_scale': {

From 9940a9f9aa6de86c54462fa181fdfcc438f71c53 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 16:44:36 +0100
Subject: [PATCH 14/53] test (ex/llm): Added tests for FP8, MX datatypes

---
 tests/brevitas_examples/llm.py | 45 ++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 33f347699..f261700cc 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -182,6 +182,51 @@ def test_small_models_acc(caplog, acc_args_and_acc):
                 "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
                 "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
                 }},
+        {
+            "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
+            "weight_quant_format": "float_ocp_e4m3",
+            "weight_quant_type": "sym",
+            "input_quant_format": "float_ocp_e5m2",
+            "input_quant_type": "sym",
+            "exp_layer_types": {
+                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
+                }},
+        {
+            "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
+            "weight_quant_format": "float_fnuz_e4m3",
+            "weight_quant_type": "sym",
+            "input_quant_format": "float_fnuz_e5m2",
+            "input_quant_type": "sym",
+            "exp_layer_types": {
+                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
+                }},
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "weight_quant_format": "float_ocp_e4m3",
+            "weight_scale_precision": "po2_scale",
+            "weight_param_method": "stats",
+            "weight_quant_granularity": "per_group",
+            "weight_group_size": 16,
+            "weight_quant_type": "sym",
+            "input_quant_format": "float_ocp_e5m2",
+            "input_scale_type": "dynamic",
+            "input_scale_precision": "po2_scale",
+            "input_param_method": "stats",
+            "input_quant_granularity": "per_group",
+            "input_group_size": 16,
+            "input_quant_type": "sym",
+            "act_calibration": False,
+            "exp_layer_types": {
+                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl": "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl": "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",
+                }},
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
             "act_equalization": "layerwise",

From b84a645603b51e14b64390d64a07c47d7641f79d Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 16:50:08 +0100
Subject: [PATCH 15/53] setup (ex/llm): Renamed requirements to match other
 examples.

---
 .../{requirements-example-llm.txt => requirements-llm.txt}    | 0
 setup.py                                                      | 4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename requirements/{requirements-example-llm.txt => requirements-llm.txt} (100%)

diff --git a/requirements/requirements-example-llm.txt b/requirements/requirements-llm.txt
similarity index 100%
rename from requirements/requirements-example-llm.txt
rename to requirements/requirements-llm.txt
diff --git a/setup.py b/setup.py
index 817c7f88e..4a756962d 100644
--- a/setup.py
+++ b/setup.py
@@ -39,10 +39,10 @@ def read_requirements(filename):
         "test": read_requirements('requirements-test.txt'),
         "tts": read_requirements('requirements-tts.txt'),
         "stt": read_requirements('requirements-stt.txt'),
+        "llm": read_requirements('requirements-llm.txt'),
         "vision": read_requirements('requirements-vision.txt'),
         "finn_integration": read_requirements('requirements-finn-integration.txt'),
-        "ort_integration": read_requirements('requirements-ort-integration.txt'),
-        "example_llm": read_requirements('requirements-example-llm.txt')},
+        "ort_integration": read_requirements('requirements-ort-integration.txt')},
     packages=find_packages('src'),
     package_dir={'': 'src'},
     zip_safe=False,

From 9f23bfb90ba12b06abd956911b90b7c7c6319ad3 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 16:55:06 +0100
Subject: [PATCH 16/53] req (ex/llm): Added explicit dependencies

---
 requirements/requirements-llm.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements/requirements-llm.txt b/requirements/requirements-llm.txt
index a1a2c615f..7070cc9c6 100644
--- a/requirements/requirements-llm.txt
+++ b/requirements/requirements-llm.txt
@@ -1 +1,3 @@
 optimum-amd[brevitas] @ git+https://github.com/huggingface/optimum-amd.git@main
+tqdm
+transformers

From af6af4bd9561e56becf821888b864f55331ed3d5 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 17:18:33 +0100
Subject: [PATCH 17/53] test (ex/llm): added weight-only test

---
 tests/brevitas_examples/llm.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index f261700cc..94297343f 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -182,6 +182,15 @@ def test_small_models_acc(caplog, acc_args_and_acc):
                 "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
                 "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
                 }},
+        {
+            "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
+            "input_bit_width": None,
+            "act_calibration": False,
+            "exp_layer_types": {
+                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant": "<class 'brevitas.proxy.runtime_quant.ActQuantProxyFromInjector'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
+                }},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "weight_quant_format": "float_ocp_e4m3",

From 70145da6d0d19b28efd27174008817058f580e51 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 17:33:47 +0100
Subject: [PATCH 18/53] precommit

---
 tests/brevitas_examples/llm.py | 76 ++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 94297343f..675bf2df2 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -177,20 +177,25 @@ def test_small_models_acc(caplog, acc_args_and_acc):
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "exp_layer_types": {
-                "lm_head": "<class 'torch.nn.modules.linear.Linear'>",
-                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
-                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
-                }},
+                "lm_head":
+                    "<class 'torch.nn.modules.linear.Linear'>",
+                "model.layers.0.self_attn.q_proj":
+                    "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant":
+                    "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant":
+                    "<class 'brevitas.core.quant.int.RescalingIntQuant'>",}},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "input_bit_width": None,
             "act_calibration": False,
             "exp_layer_types": {
-                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
-                "model.layers.0.self_attn.q_proj.input_quant": "<class 'brevitas.proxy.runtime_quant.ActQuantProxyFromInjector'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.int.RescalingIntQuant'>",
-                }},
+                "model.layers.0.self_attn.q_proj":
+                    "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant":
+                    "<class 'brevitas.proxy.runtime_quant.ActQuantProxyFromInjector'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant":
+                    "<class 'brevitas.core.quant.int.RescalingIntQuant'>",}},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "weight_quant_format": "float_ocp_e4m3",
@@ -198,10 +203,12 @@ def test_small_models_acc(caplog, acc_args_and_acc):
             "input_quant_format": "float_ocp_e5m2",
             "input_quant_type": "sym",
             "exp_layer_types": {
-                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
-                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
-                }},
+                "model.layers.0.self_attn.q_proj":
+                    "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant":
+                    "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant":
+                    "<class 'brevitas.core.quant.float.FloatQuant'>",}},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "weight_quant_format": "float_fnuz_e4m3",
@@ -209,10 +216,12 @@ def test_small_models_acc(caplog, acc_args_and_acc):
             "input_quant_format": "float_fnuz_e5m2",
             "input_quant_type": "sym",
             "exp_layer_types": {
-                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
-                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
-                }},
+                "model.layers.0.self_attn.q_proj":
+                    "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant":
+                    "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant":
+                    "<class 'brevitas.core.quant.float.FloatQuant'>",}},
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
             "weight_quant_format": "float_ocp_e4m3",
@@ -230,30 +239,37 @@ def test_small_models_acc(caplog, acc_args_and_acc):
             "input_quant_type": "sym",
             "act_calibration": False,
             "exp_layer_types": {
-                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
-                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
-                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl": "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "<class 'brevitas.core.quant.float.FloatQuant'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl": "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",
-                }},
+                "model.layers.0.self_attn.q_proj":
+                    "<class 'brevitas.nn.quant_linear.QuantLinear'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant":
+                    "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl":
+                    "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant":
+                    "<class 'brevitas.core.quant.float.FloatQuant'>",
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl":
+                    "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",}},
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
             "act_equalization": "layerwise",
             "exp_layer_types": {
-                "model.layers.0.self_attn.q_proj": "<class 'brevitas.nn.equalized_layer.EqualizedModule'>",
-                "model.layers.0.self_attn.q_proj.layer": "<class 'brevitas.nn.quant_linear.QuantLinear'>",
-                }},
+                "model.layers.0.self_attn.q_proj":
+                    "<class 'brevitas.nn.equalized_layer.EqualizedModule'>",
+                "model.layers.0.self_attn.q_proj.layer":
+                    "<class 'brevitas.nn.quant_linear.QuantLinear'>",}},
         {
             "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
             "replace_mha": True,
             "exp_layer_types": {
-                "model.decoder.layers.0.self_attn": "<class 'brevitas_examples.llm.llm_quant.mha_layers.QuantizableOPTAttention'>",
-                "model.decoder.layers.0.self_attn.mha": "<class 'brevitas.nn.quant_mha.QuantMultiheadAttention'>",
-                }},
+                "model.decoder.layers.0.self_attn":
+                    "<class 'brevitas_examples.llm.llm_quant.mha_layers.QuantizableOPTAttention'>",
+                "model.decoder.layers.0.self_attn.mha":
+                    "<class 'brevitas.nn.quant_mha.QuantMultiheadAttention'>",}},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "quantize_last_layer": True,
-            "exp_layer_types": {"lm_head": "<class 'brevitas.nn.quant_linear.QuantLinear'>"}},])
+            "exp_layer_types": {
+                "lm_head": "<class 'brevitas.nn.quant_linear.QuantLinear'>"}},])
 def layer_args(default_run_args, request):
     args = default_run_args
     layer_dict = request.param

From f0ec89e076af47f4bc596121d0fa326707fad86c Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 18:19:43 +0100
Subject: [PATCH 19/53] Feat (ex/llm): Allow supplying a prefix for the
 exported ONNX/TS model

---
 src/brevitas_examples/llm/main.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py
index 6c3c34dd5..6060ef498 100644
--- a/src/brevitas_examples/llm/main.py
+++ b/src/brevitas_examples/llm/main.py
@@ -63,16 +63,15 @@ def model_export(model, ref_input, args):
             export_manager = StdQCDQONNXManager
             export_manager.change_weight_export(export_weight_q_node=True)
 
-        print(f"Exporting the model in ./quantized_onnx/{args.model.replace('/', '-')}")
+        print(f"Exporting the model in ./{args.export_prefix}")
         with torch.no_grad(), brevitas_proxy_export_mode(model, export_manager=export_manager):
             onnx_export_from_model(
                 model,
-                f"./quantized_onnx/{args.model.replace('/', '-')}",
+                f"./{args.export_prefix}",
                 task="text-generation-with-past",
                 do_validation=False)
     elif args.export_target == 'torch_qcdq':
-        export_torch_qcdq(
-            model, ref_input['input_ids'], export_path=f"{args.model.replace('/', '-')}.pt")
+        export_torch_qcdq(model, ref_input['input_ids'], export_path=f"{args.export_prefix}.pt")
 
 
 def validate(args):
@@ -115,6 +114,9 @@ def main(args):
     validate(args)
     set_seed(args.seed)
 
+    if args.export_prefix is None:
+        args.export_prefix = f"{args.model.replace('/', '--')}"
+
     if args.no_float16:
         dtype = torch.float32
     else:
@@ -452,6 +454,13 @@ def parse_args(args):
             'sharded_torchmlir_group_weight',
             'sharded_packed_torchmlir_group_weight'],
         help='Model export.')
+    parser.add_argument(
+        '--export-prefix',
+        type=str,
+        default=None,
+        help=
+        "Path prefix to use for the various export flows. If None, a path will be derived from the model name (default: %(default)s)"
+    )
     parser.add_argument(
         '--checkpoint-name',
         type=str,

From c9d9b666a00a99977a5e0b54dba792f7bd2ca59c Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 21 Aug 2024 18:20:33 +0100
Subject: [PATCH 20/53] test (ex/llm): Added ONNX export and torchscript tests.

---
 tests/brevitas_examples/llm.py | 61 ++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 675bf2df2..a05a2e2c5 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -4,10 +4,13 @@
 from argparse import Namespace
 from dataclasses import dataclass
 import logging
+import os
 import shutil
 
 import numpy as np
+import onnx
 import pytest
+import torch
 
 from brevitas_examples.llm.main import main
 from brevitas_examples.llm.main import parse_args
@@ -87,6 +90,7 @@ def default_run_args(request):
     args.dataset = "c4"
     args.eval = True
     #args.checkpoint = ptid2pathname(request.node.nodeid) + ".pth" # Example filename which won't clash
+    args.export_prefix = ptid2pathname(request.node.nodeid)
     args.weight_bit_width = 8
     args.weight_quant_granularity = "per_channel"  # "per_tensor", "per_channel", "per_group".
     args.input_bit_width = 8
@@ -284,3 +288,60 @@ def test_small_models_quant_layer(caplog, layer_args):
     args, exp_layer_types = layer_args
     float_ppl, quant_ppl, model = main(args)
     assert_layer_types(model, exp_layer_types)
+
+
+@pytest.fixture(
+    params=[
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "quantize_weight_zero_point": True,
+            "quantize_input_zero_point": True,
+            "export_target": "onnx_qcdq",},
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "weight_quant_type": "sym",
+            "input_quant_type": "sym",
+            "export_target": "onnx_qcdq",},])
+def onnx_export_args(default_run_args, request):
+    args = default_run_args
+    export_dict = request.param
+    args.update(**export_dict)
+    yield args
+
+
+def test_small_models_onnx_export(caplog, onnx_export_args):
+    caplog.set_level(logging.INFO)
+    args = onnx_export_args
+    float_ppl, quant_ppl, model = main(args)
+    onnx_model = onnx.load(os.path.join(args.export_prefix, "model.onnx"))
+    shutil.rmtree(args.export_prefix)
+
+
+@pytest.fixture(
+    params=[
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "eval": False,
+            "quantize_weight_zero_point": True,
+            "quantize_input_zero_point": True,
+            "export_target": "torch_qcdq",},
+        {
+            "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+            "eval": False,
+            "weight_quant_type": "sym",
+            "input_quant_type": "sym",
+            "export_target": "torch_qcdq",},])
+def torch_export_args(default_run_args, request):
+    args = default_run_args
+    export_dict = request.param
+    args.update(**export_dict)
+    yield args
+
+
+def test_small_models_torch_export(caplog, torch_export_args):
+    caplog.set_level(logging.INFO)
+    args = torch_export_args
+    float_ppl, quant_ppl, model = main(args)
+    filepath = args.export_prefix + ".pt"
+    torchscript_model = torch.jit.load(filepath)
+    os.remove(filepath)

From 9a6e50916c01bbd189b80a95eb1c36e2673746c6 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 11:22:37 +0100
Subject: [PATCH 21/53] test (ex/llm): marked llm tests.

---
 tests/brevitas_examples/llm.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index a05a2e2c5..138591606 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -119,8 +119,7 @@ def toggle_run_args(default_run_args, request):
     yield args
 
 
-@pytest.mark.examples
-@pytest.mark.weekly
+@pytest.mark.llm
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
     caplog.set_level(logging.INFO)
     args = toggle_run_args
@@ -164,8 +163,7 @@ def acc_args_and_acc(default_run_args, request):
     yield args, float_ppl, quant_ppl
 
 
-@pytest.mark.examples
-@pytest.mark.weekly
+@pytest.mark.llm
 def test_small_models_acc(caplog, acc_args_and_acc):
     caplog.set_level(logging.INFO)
     args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
@@ -283,6 +281,7 @@ def layer_args(default_run_args, request):
     yield args, exp_layer_types
 
 
+@pytest.mark.llm
 def test_small_models_quant_layer(caplog, layer_args):
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args
@@ -309,6 +308,7 @@ def onnx_export_args(default_run_args, request):
     yield args
 
 
+@pytest.mark.llm
 def test_small_models_onnx_export(caplog, onnx_export_args):
     caplog.set_level(logging.INFO)
     args = onnx_export_args
@@ -338,6 +338,7 @@ def torch_export_args(default_run_args, request):
     yield args
 
 
+@pytest.mark.llm
 def test_small_models_torch_export(caplog, torch_export_args):
     caplog.set_level(logging.INFO)
     args = torch_export_args

From c39749bce5554155fe08e024136b3b26fe20a25a Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 11:33:02 +0100
Subject: [PATCH 22/53] test (ex/llm): Added commented-out mixtral model

---
 tests/brevitas_examples/llm.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py
index 138591606..3b70d266f 100644
--- a/tests/brevitas_examples/llm.py
+++ b/tests/brevitas_examples/llm.py
@@ -76,7 +76,13 @@ class ModelAndPpl:
             name="hf-internal-testing/tiny-random-MistralForCausalLM",
             float_ppl=None,
             supports_fx=False,
-        ),])
+        ),
+        #ModelAndPpl(
+        #    name="dacorvo/Mixtral-tiny",
+        #    float_ppl=None,
+        #    supports_fx=True,
+        #),
+        ])
 def small_models_with_ppl(request):
     yield request.param
 

From f214f7c53cf1904245b37554defe3f294af78178 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 11:40:02 +0100
Subject: [PATCH 23/53] test (ex/llm): Renamed llm test file.

---
 tests/brevitas_examples/{llm.py => test_llm.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/brevitas_examples/{llm.py => test_llm.py} (100%)

diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/test_llm.py
similarity index 100%
rename from tests/brevitas_examples/llm.py
rename to tests/brevitas_examples/test_llm.py

From 852714f90937f20ca6a03f14c85c215b221b5f46 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 12:19:12 +0100
Subject: [PATCH 24/53] test (ex/llm): improved test compatibility with test
 infrastructure

---
 noxfile.py                          | 12 +++++++++++-
 pytest.ini                          |  3 +++
 tests/brevitas_examples/test_llm.py | 15 ++++++++++++---
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index ffb1c5fbd..7af395105 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -105,7 +105,17 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
     install_pytorch(pytorch, session)
     install_torchvision(pytorch, session)  # For CV eval scripts
     session.install('--upgrade', '.[test, tts, stt, vision]')
-    session.run('pytest', '-n', 'logical', 'tests/brevitas_examples')
+    session.run('pytest', '-n', 'logical', '-k', 'not llm', 'tests/brevitas_examples')
+
+
+@nox.session(python=PYTHON_VERSIONS)
+@nox.parametrize("pytorch", PYTORCH_VERSIONS, ids=PYTORCH_IDS)
+@nox.parametrize("jit_status", JIT_STATUSES, ids=JIT_IDS)
+def tests_brevitas_examples_llm(session, pytorch, jit_status):
+    session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled'))
+    install_pytorch(pytorch, session)
+    session.install('--upgrade', '-e', '.[test, llm, export]')
+    session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples')
 
 
 @nox.session(python=PYTHON_VERSIONS)
diff --git a/pytest.ini b/pytest.ini
index a560d3e16..a3cd14b59 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -7,3 +7,6 @@ log_cli_format = %(message)s
 
 # pytest-mock should use Pypi's mock rather than Python's built-in
 mock_use_standalone_module = true
+
+markers =
+    llm: mark a test which tests brevitas_examples/llm
diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 3b70d266f..a646764f4 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -12,16 +12,19 @@
 import pytest
 import torch
 
+from brevitas import config
 from brevitas_examples.llm.main import main
 from brevitas_examples.llm.main import parse_args
 
+from tests.marker import jit_disabled_for_export
+
 
 def ptid2pathname(string):
     return string.replace("/", "-").replace(":", "-")
 
 
 def allclose(x, y):
-    return np.allclose(x, y, rtol=1e-02, atol=5e-01, equal_nan=False)
+    return np.allclose(x, y, rtol=1e-04, atol=3e-00, equal_nan=False)
 
 
 def allexact(x, y):
@@ -176,8 +179,12 @@ def test_small_models_acc(caplog, acc_args_and_acc):
     float_ppl, quant_ppl, model = main(args)
     float_ppl = float_ppl.detach().cpu().numpy()
     quant_ppl = quant_ppl.detach().cpu().numpy()
-    assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
-    assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+    if config.JIT_ENABLED:
+        assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+        assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+    else:
+        assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+        assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
 
 
 @pytest.fixture(
@@ -315,6 +322,7 @@ def onnx_export_args(default_run_args, request):
 
 
 @pytest.mark.llm
+@jit_disabled_for_export()
 def test_small_models_onnx_export(caplog, onnx_export_args):
     caplog.set_level(logging.INFO)
     args = onnx_export_args
@@ -345,6 +353,7 @@ def torch_export_args(default_run_args, request):
 
 
 @pytest.mark.llm
+@jit_disabled_for_export()
 def test_small_models_torch_export(caplog, torch_export_args):
     caplog.set_level(logging.INFO)
     args = torch_export_args

From c778818946ddb6fe223127176a00d0b39361ee4e Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 12:25:29 +0100
Subject: [PATCH 25/53] precommit

---
 tests/brevitas_examples/test_llm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index a646764f4..3a362b06e 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -15,7 +15,6 @@
 from brevitas import config
 from brevitas_examples.llm.main import main
 from brevitas_examples.llm.main import parse_args
-
 from tests.marker import jit_disabled_for_export
 
 
@@ -85,7 +84,7 @@ class ModelAndPpl:
         #    float_ppl=None,
         #    supports_fx=True,
         #),
-        ])
+    ])
 def small_models_with_ppl(request):
     yield request.param
 

From c659e64aec6d62e58933f7055cfbf4b95676121b Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 12:27:13 +0100
Subject: [PATCH 26/53] test (gha/ex/llm): Added new workflow to testing LLM
 example

---
 .github/workflows/examples_llm_pytest.yml     | 65 +++++++++++++++++++
 .github/workflows/gen_github_actions.py       | 26 ++++++++
 .../workflows/reduced_examples_llm_pytest.yml | 64 ++++++++++++++++++
 3 files changed, 155 insertions(+)
 create mode 100644 .github/workflows/examples_llm_pytest.yml
 create mode 100644 .github/workflows/reduced_examples_llm_pytest.yml

diff --git a/.github/workflows/examples_llm_pytest.yml b/.github/workflows/examples_llm_pytest.yml
new file mode 100644
index 000000000..bc7f82287
--- /dev/null
+++ b/.github/workflows/examples_llm_pytest.yml
@@ -0,0 +1,65 @@
+name: Examples LLM Pytest
+
+on:
+  push:
+    branches: [ master, dev ]
+  pull_request:
+    types:
+      - review_requested
+
+jobs:
+  build:
+    runs-on: ${{ matrix.platform }}
+    strategy:
+      fail-fast: false
+
+
+      matrix:
+        python_version: ['3.8', '3.9']
+        pytorch_version: ['1.9.1', '1.10.1', '1.11.0', '1.12.1', '1.13.0', '2.0.1', '2.1.0']
+        platform: ['windows-latest', 'ubuntu-latest', 'macos-latest']
+        jit_status: ['jit_disabled', 'jit_enabled']
+
+
+        exclude:
+          - pytorch_version: '1.9.1'
+            platform: 'macos-latest'
+
+          - pytorch_version: '1.9.1'
+            jit_status: 'jit_enabled'
+
+
+
+    if: ${{ !github.event.pull_request.draft }}
+    steps:
+
+    - name: Checkout repo
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+       python-version: ${{ matrix.python_version }}
+
+    - name: Install Nox dependencies
+      shell: bash
+      run: pip install -r requirements/requirements-nox.txt
+
+    - name: Install update
+      shell: bash
+      run: sudo apt-get update
+      if: startsWith(runner.os, 'Linux') == true
+
+    - name: Install libsndfile and libgomp1 on Ubuntu
+      shell: bash
+      run: sudo apt-get install -y libsndfile-dev libgomp1
+      if: startsWith(runner.os, 'Linux') == true
+
+    - name: Install libomp on macOS
+      shell: bash
+      run: brew install libomp
+      if: startsWith(runner.os, 'macOS') == true
+
+    - name: Run Nox session for brevitas_examples pytest
+      shell: bash
+      run: nox -v -s tests_brevitas_examples_llm-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\)
diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py
index 4cd6c6827..d491795ca 100644
--- a/.github/workflows/gen_github_actions.py
+++ b/.github/workflows/gen_github_actions.py
@@ -8,6 +8,7 @@
 BASE_YML_REDUCED_TEMPLATE = 'base_reduced.yml.template'
 PYTEST_YML = 'pytest.yml'
 EXAMPLES_PYTEST_YML = 'examples_pytest.yml'
+EXAMPLES_LLM_PYTEST_YML = 'examples_llm_pytest.yml'
 DEVELOP_INSTALL_YML = 'develop_install.yml'
 FINN_INTEGRATION_YML = 'finn_integration.yml'
 ORT_INTEGRATION_YML = 'ort_integration.yml'
@@ -80,6 +81,13 @@
             'nox -v -s tests_brevitas_examples_cpu-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\)'
         )]),]
 
+EXAMPLES_LLM_PYTEST_STEP_LIST = [
+    od([('name', 'Run Nox session for brevitas_examples pytest'), ('shell', 'bash'),
+        (
+            'run',
+            'nox -v -s tests_brevitas_examples_llm-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\)'
+        )]),]
+
 FINN_INTEGRATION_STEP_LIST = [
     od([('name', 'Install protobuf on Ubuntu'), ('shell', 'bash'),
         ('run', 'sudo apt-get install protobuf-compiler libprotoc-dev'),
@@ -167,6 +175,23 @@ def gen_examples_pytest_yml():
     pytest.gen_yaml(BASE_YML_REDUCED_TEMPLATE, 'reduced_' + EXAMPLES_PYTEST_YML)
 
 
+def gen_examples_llm_pytest_yml():
+    pytest = Action(
+        'Examples LLM Pytest',
+        EXCLUDE_LIST + JIT_EXCLUDE_LIST,
+        combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]),
+        EXAMPLES_LLM_PYTEST_STEP_LIST,
+        STRATEGY)
+    pytest.gen_yaml(BASE_YML_TEMPLATE, EXAMPLES_LLM_PYTEST_YML)
+    pytest = Action(
+        'Examples LLM Pytest',
+        EXCLUDE_LIST,
+        combine_od_list([MATRIX_REDUCED, PYTEST_MATRIX_EXTRA_REDUCED]),
+        EXAMPLES_LLM_PYTEST_STEP_LIST,
+        STRATEGY)
+    pytest.gen_yaml(BASE_YML_REDUCED_TEMPLATE, 'reduced_' + EXAMPLES_LLM_PYTEST_YML)
+
+
 def gen_test_develop_install_yml():
     test_develop_install = Action(
         'Test develop install', EXCLUDE_LIST, MATRIX, TEST_INSTALL_DEV_STEP_LIST, STRATEGY)
@@ -243,6 +268,7 @@ def gen_test_brevitas_end_to_end():
 if __name__ == '__main__':
     gen_pytest_yml()
     gen_examples_pytest_yml()
+    gen_examples_llm_pytest_yml()
     gen_test_develop_install_yml()
     gen_test_brevitas_finn_integration()
     gen_test_brevitas_ort_integration()
diff --git a/.github/workflows/reduced_examples_llm_pytest.yml b/.github/workflows/reduced_examples_llm_pytest.yml
new file mode 100644
index 000000000..37f8aea46
--- /dev/null
+++ b/.github/workflows/reduced_examples_llm_pytest.yml
@@ -0,0 +1,64 @@
+name: Examples LLM Pytest
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+
+
+jobs:
+  build:
+    runs-on: ${{ matrix.platform }}
+    strategy:
+      fail-fast: false
+
+
+      matrix:
+        python_version: ['3.8']
+        pytorch_version: ['1.9.1', '1.13.0', '2.1.0']
+        platform: ['ubuntu-latest']
+        jit_status: ['jit_disabled']
+
+
+        exclude:
+          - pytorch_version: '1.9.1'
+            platform: 'macos-latest'
+
+
+
+    if: ${{ !github.event.pull_request.draft }}
+    steps:
+
+    - name: Checkout repo
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+       python-version: ${{ matrix.python_version }}
+
+    - name: Install Nox dependencies
+      shell: bash
+      run: pip install -r requirements/requirements-nox.txt
+
+    - name: Install update
+      shell: bash
+      run: sudo apt-get update
+      if: startsWith(runner.os, 'Linux') == true
+
+    - name: Install libsndfile and libgomp1 on Ubuntu
+      shell: bash
+      run: sudo apt-get install -y libsndfile-dev libgomp1
+      if: startsWith(runner.os, 'Linux') == true
+
+    - name: Install libomp on macOS
+      shell: bash
+      run: brew install libomp
+      if: startsWith(runner.os, 'macOS') == true
+
+    - name: Run Nox session for brevitas_examples pytest
+      shell: bash
+      run: nox -v -s tests_brevitas_examples_llm-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\)

From 1deb288d6a5131cf1363dcf2cea910e03ee4e2fd Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 12:32:31 +0100
Subject: [PATCH 27/53] test (ex/llm): Removed onnx dependency when collecting
 tests.

---
 tests/brevitas_examples/test_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 3a362b06e..90279a8af 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -8,7 +8,6 @@
 import shutil
 
 import numpy as np
-import onnx
 import pytest
 import torch
 
@@ -323,6 +322,7 @@ def onnx_export_args(default_run_args, request):
 @pytest.mark.llm
 @jit_disabled_for_export()
 def test_small_models_onnx_export(caplog, onnx_export_args):
+    import onnx
     caplog.set_level(logging.INFO)
     args = onnx_export_args
     float_ppl, quant_ppl, model = main(args)

From 7b637233f608decd487bcf2f781d9aca66854b9c Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 12:52:44 +0100
Subject: [PATCH 28/53] test (ex/llm): softened accuracy conditions

---
 tests/brevitas_examples/test_llm.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 90279a8af..09f5db9fe 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -25,6 +25,10 @@ def allclose(x, y):
     return np.allclose(x, y, rtol=1e-04, atol=3e-00, equal_nan=False)
 
 
+def allveryclose(x, y):
+    return np.allclose(x, y, rtol=1e-08, atol=1e-01, equal_nan=False)
+
+
 def allexact(x, y):
     return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False)
 
@@ -181,8 +185,8 @@ def test_small_models_acc(caplog, acc_args_and_acc):
         assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
         assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
     else:
-        assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
-        assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+        assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+        assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
 
 
 @pytest.fixture(

From 0235b5c5da266622c2f99831cb4c57ec2b7d8f9f Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 13:01:01 +0100
Subject: [PATCH 29/53] test (ex/llm): reorganise to prevent export issues

---
 tests/brevitas_examples/test_llm.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 09f5db9fe..c03c71f76 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -12,8 +12,6 @@
 import torch
 
 from brevitas import config
-from brevitas_examples.llm.main import main
-from brevitas_examples.llm.main import parse_args
 from tests.marker import jit_disabled_for_export
 
 
@@ -94,6 +92,7 @@ def small_models_with_ppl(request):
 
 @pytest.fixture()
 def default_run_args(request):
+    from brevitas_examples.llm.main import parse_args
     args = UpdatableNamespace(**vars(parse_args([])))
     args.nsamples = 2
     args.seqlen = 2
@@ -132,6 +131,7 @@ def toggle_run_args(default_run_args, request):
 
 @pytest.mark.llm
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
+    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args = toggle_run_args
     args.model = small_models_with_ppl.name
@@ -176,6 +176,7 @@ def acc_args_and_acc(default_run_args, request):
 
 @pytest.mark.llm
 def test_small_models_acc(caplog, acc_args_and_acc):
+    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
     float_ppl, quant_ppl, model = main(args)
@@ -298,6 +299,7 @@ def layer_args(default_run_args, request):
 
 @pytest.mark.llm
 def test_small_models_quant_layer(caplog, layer_args):
+    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args
     float_ppl, quant_ppl, model = main(args)
@@ -326,6 +328,7 @@ def onnx_export_args(default_run_args, request):
 @pytest.mark.llm
 @jit_disabled_for_export()
 def test_small_models_onnx_export(caplog, onnx_export_args):
+    from brevitas_examples.llm.main import main
     import onnx
     caplog.set_level(logging.INFO)
     args = onnx_export_args
@@ -358,6 +361,7 @@ def torch_export_args(default_run_args, request):
 @pytest.mark.llm
 @jit_disabled_for_export()
 def test_small_models_torch_export(caplog, torch_export_args):
+    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args = torch_export_args
     float_ppl, quant_ppl, model = main(args)

From bedf1b32271ee85666a2ebe238199cc17e575877 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 13:04:12 +0100
Subject: [PATCH 30/53] precommit

---
 tests/brevitas_examples/test_llm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index c03c71f76..67db55bc2 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -328,8 +328,9 @@ def onnx_export_args(default_run_args, request):
 @pytest.mark.llm
 @jit_disabled_for_export()
 def test_small_models_onnx_export(caplog, onnx_export_args):
-    from brevitas_examples.llm.main import main
     import onnx
+
+    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args = onnx_export_args
     float_ppl, quant_ppl, model = main(args)

From 79c271a52fe774cbc168540898f5611851673c37 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 13:08:35 +0100
Subject: [PATCH 31/53] test (ex/llm): remove upgrade flag to prevent new
 pytorch from being installed

---
 noxfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 7af395105..43ada0632 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -114,7 +114,7 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
 def tests_brevitas_examples_llm(session, pytorch, jit_status):
     session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled'))
     install_pytorch(pytorch, session)
-    session.install('--upgrade', '-e', '.[test, llm, export]')
+    session.install('-e', '.[test, llm, export]')
     session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples')
 
 

From 44d2e56da96f86f9c97f7284d0e0a073b0ac4303 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 13:41:57 +0100
Subject: [PATCH 32/53] test (gha/ex/llm): Added custom pytorch versions for
 LLM tests.

---
 .github/workflows/examples_llm_pytest.yml         |  2 +-
 .github/workflows/gen_github_actions.py           | 12 ++++++++++--
 .github/workflows/reduced_examples_llm_pytest.yml |  2 +-
 tests/brevitas_examples/test_llm.py               |  5 +++++
 4 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/examples_llm_pytest.yml b/.github/workflows/examples_llm_pytest.yml
index bc7f82287..e939a93b2 100644
--- a/.github/workflows/examples_llm_pytest.yml
+++ b/.github/workflows/examples_llm_pytest.yml
@@ -16,7 +16,7 @@ jobs:
 
       matrix:
         python_version: ['3.8', '3.9']
-        pytorch_version: ['1.9.1', '1.10.1', '1.11.0', '1.12.1', '1.13.0', '2.0.1', '2.1.0']
+        pytorch_version: ['2.2.2', '2.3.1', '2.4.0']
         platform: ['windows-latest', 'ubuntu-latest', 'macos-latest']
         jit_status: ['jit_disabled', 'jit_enabled']
 
diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py
index d491795ca..f24ab793b 100644
--- a/.github/workflows/gen_github_actions.py
+++ b/.github/workflows/gen_github_actions.py
@@ -26,6 +26,10 @@
                      ('pytorch_version', list(PYTORCH_LIST_REDUCED)),
                      ('platform', PLATFORM_LIST_REDUCED)])
 
+EXAMPLES_LLM_PYTEST_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)),
+                                         ('pytorch_version', list(
+                                             ('2.2.2',))), ('platform', PLATFORM_LIST_REDUCED)])
+
 FINN_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)),
                           ('pytorch_version', list(PYTORCH_LIST_REDUCED)),
                           ('platform', PLATFORM_LIST_REDUCED)])
@@ -62,6 +66,10 @@
 MATRIX = od([('python_version', list(PYTHON_VERSIONS)), ('pytorch_version', list(PYTORCH_VERSIONS)),
              ('platform', PLATFORM_LIST)])
 
+EXAMPLES_LLM_PYTEST_MATRIX = od([('python_version', list(PYTHON_VERSIONS)),
+                                 ('pytorch_version', list(
+                                     ('2.2.2', '2.3.1', '2.4.0'))), ('platform', PLATFORM_LIST)])
+
 FINN_MATRIX = od([('python_version', list(PYTHON_VERSIONS)),
                   ('pytorch_version', list(PYTORCH_VERSIONS)), ('platform', FINN_PLATFORM_LIST)])
 
@@ -179,14 +187,14 @@ def gen_examples_llm_pytest_yml():
     pytest = Action(
         'Examples LLM Pytest',
         EXCLUDE_LIST + JIT_EXCLUDE_LIST,
-        combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]),
+        combine_od_list([EXAMPLES_LLM_PYTEST_MATRIX, PYTEST_MATRIX_EXTRA]),
         EXAMPLES_LLM_PYTEST_STEP_LIST,
         STRATEGY)
     pytest.gen_yaml(BASE_YML_TEMPLATE, EXAMPLES_LLM_PYTEST_YML)
     pytest = Action(
         'Examples LLM Pytest',
         EXCLUDE_LIST,
-        combine_od_list([MATRIX_REDUCED, PYTEST_MATRIX_EXTRA_REDUCED]),
+        combine_od_list([EXAMPLES_LLM_PYTEST_MATRIX_REDUCED, PYTEST_MATRIX_EXTRA_REDUCED]),
         EXAMPLES_LLM_PYTEST_STEP_LIST,
         STRATEGY)
     pytest.gen_yaml(BASE_YML_REDUCED_TEMPLATE, 'reduced_' + EXAMPLES_LLM_PYTEST_YML)
diff --git a/.github/workflows/reduced_examples_llm_pytest.yml b/.github/workflows/reduced_examples_llm_pytest.yml
index 37f8aea46..ca7fa1dc6 100644
--- a/.github/workflows/reduced_examples_llm_pytest.yml
+++ b/.github/workflows/reduced_examples_llm_pytest.yml
@@ -18,7 +18,7 @@ jobs:
 
       matrix:
         python_version: ['3.8']
-        pytorch_version: ['1.9.1', '1.13.0', '2.1.0']
+        pytorch_version: ['2.2.2']
         platform: ['ubuntu-latest']
         jit_status: ['jit_disabled']
 
diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 67db55bc2..66c09d448 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -130,6 +130,7 @@ def toggle_run_args(default_run_args, request):
 
 
 @pytest.mark.llm
+@requires_pt_ge('2.2')
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
     from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
@@ -175,6 +176,7 @@ def acc_args_and_acc(default_run_args, request):
 
 
 @pytest.mark.llm
+@requires_pt_ge('2.2')
 def test_small_models_acc(caplog, acc_args_and_acc):
     from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
@@ -298,6 +300,7 @@ def layer_args(default_run_args, request):
 
 
 @pytest.mark.llm
+@requires_pt_ge('2.2')
 def test_small_models_quant_layer(caplog, layer_args):
     from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
@@ -327,6 +330,7 @@ def onnx_export_args(default_run_args, request):
 
 @pytest.mark.llm
 @jit_disabled_for_export()
+@requires_pt_ge('2.2')
 def test_small_models_onnx_export(caplog, onnx_export_args):
     import onnx
 
@@ -361,6 +365,7 @@ def torch_export_args(default_run_args, request):
 
 @pytest.mark.llm
 @jit_disabled_for_export()
+@requires_pt_ge('2.2')
 def test_small_models_torch_export(caplog, torch_export_args):
     from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)

From 32ea1ef3d61fa0b4ddcbbdcc1d91a604148cc496 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 13:49:40 +0100
Subject: [PATCH 33/53] test (ex/llm/gha): Fixed custom pytorch versions for
 LLM test

---
 .github/workflows/gen_github_actions.py | 5 +++--
 noxfile.py                              | 6 +++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py
index f24ab793b..316d21a4d 100644
--- a/.github/workflows/gen_github_actions.py
+++ b/.github/workflows/gen_github_actions.py
@@ -66,9 +66,10 @@
 MATRIX = od([('python_version', list(PYTHON_VERSIONS)), ('pytorch_version', list(PYTORCH_VERSIONS)),
              ('platform', PLATFORM_LIST)])
 
+EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS = ('2.2.2', '2.3.1', '2.4.0')
 EXAMPLES_LLM_PYTEST_MATRIX = od([('python_version', list(PYTHON_VERSIONS)),
-                                 ('pytorch_version', list(
-                                     ('2.2.2', '2.3.1', '2.4.0'))), ('platform', PLATFORM_LIST)])
+                                 ('pytorch_version', list(EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS)),
+                                 ('platform', PLATFORM_LIST)])
 
 FINN_MATRIX = od([('python_version', list(PYTHON_VERSIONS)),
                   ('pytorch_version', list(PYTORCH_VERSIONS)), ('platform', FINN_PLATFORM_LIST)])
diff --git a/noxfile.py b/noxfile.py
index 43ada0632..93fc194e4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -9,6 +9,7 @@
 from packaging import version
 
 sys.path.append(os.path.join(os.path.dirname(__file__), os.path.join('.', '.github', 'workflows')))
+from gen_github_actions import EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS
 from gen_github_actions import JIT_STATUSES
 from gen_github_actions import PYTHON_VERSIONS
 from gen_github_actions import PYTORCH_VERSIONS
@@ -16,6 +17,8 @@
 IS_OSX = system() == 'Darwin'
 PYTORCH_STABLE_WHEEL_SRC = 'https://download.pytorch.org/whl/torch_stable.html'
 PYTORCH_IDS = tuple([f'pytorch_{i}' for i in PYTORCH_VERSIONS])
+EXAMPLES_LLM_PYTEST_PYTORCH_IDS = tuple([
+    f'pytorch_{i}' for i in EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS])
 JIT_IDS = tuple([f'{i}'.lower() for i in JIT_STATUSES])
 LSTM_EXPORT_MIN_PYTORCH = '1.10.1'
 
@@ -109,7 +112,8 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
 
 
 @nox.session(python=PYTHON_VERSIONS)
-@nox.parametrize("pytorch", PYTORCH_VERSIONS, ids=PYTORCH_IDS)
+@nox.parametrize(
+    "pytorch", EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS, ids=EXAMPLES_LLM_PYTEST_PYTORCH_IDS)
 @nox.parametrize("jit_status", JIT_STATUSES, ids=JIT_IDS)
 def tests_brevitas_examples_llm(session, pytorch, jit_status):
     session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled'))

From 0a8d7ee51a232cc161cf74b39277088bad3b7719 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 13:57:24 +0100
Subject: [PATCH 34/53] Bugfixes

---
 noxfile.py                          | 3 ++-
 tests/brevitas_examples/test_llm.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 93fc194e4..77b90a9b5 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -118,7 +118,8 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
 def tests_brevitas_examples_llm(session, pytorch, jit_status):
     session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled'))
     install_pytorch(pytorch, session)
-    session.install('-e', '.[test, llm, export]')
+    session.install(
+        '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu')
     session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples')
 
 
diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 66c09d448..9c39b878e 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -13,6 +13,7 @@
 
 from brevitas import config
 from tests.marker import jit_disabled_for_export
+from tests.marker import requires_pt_ge
 
 
 def ptid2pathname(string):

From b3bcedd9b702a9beff2d571676bb2366d05d1838 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:05:43 +0100
Subject: [PATCH 35/53] test (ex/llm/nox): Better exclusion for examples.

---
 noxfile.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index 77b90a9b5..ff0ea3d29 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -108,7 +108,7 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
     install_pytorch(pytorch, session)
     install_torchvision(pytorch, session)  # For CV eval scripts
     session.install('--upgrade', '.[test, tts, stt, vision]')
-    session.run('pytest', '-n', 'logical', '-k', 'not llm', 'tests/brevitas_examples')
+    session.run('pytest', '-n', 'logical', '--ignore-glob', 'tests/brevitas_examples/*llm*', 'tests/brevitas_examples')
 
 
 @nox.session(python=PYTHON_VERSIONS)
@@ -120,7 +120,7 @@ def tests_brevitas_examples_llm(session, pytorch, jit_status):
     install_pytorch(pytorch, session)
     session.install(
         '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu')
-    session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples')
+    session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples/test_llm.py')
 
 
 @nox.session(python=PYTHON_VERSIONS)

From 47d7cbbc8cb37ac0026ee9fe116decfda00bd737 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:06:36 +0100
Subject: [PATCH 36/53] Revert "test (ex/llm): reorganise to prevent export
 issues"

This reverts commit a9d63613e7f67ee726c2884949d102ae00dc4a9c.
---
 tests/brevitas_examples/test_llm.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 9c39b878e..9e12fc3cf 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -12,6 +12,8 @@
 import torch
 
 from brevitas import config
+from brevitas_examples.llm.main import main
+from brevitas_examples.llm.main import parse_args
 from tests.marker import jit_disabled_for_export
 from tests.marker import requires_pt_ge
 
@@ -93,7 +95,6 @@ def small_models_with_ppl(request):
 
 @pytest.fixture()
 def default_run_args(request):
-    from brevitas_examples.llm.main import parse_args
     args = UpdatableNamespace(**vars(parse_args([])))
     args.nsamples = 2
     args.seqlen = 2
@@ -133,7 +134,6 @@ def toggle_run_args(default_run_args, request):
 @pytest.mark.llm
 @requires_pt_ge('2.2')
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
-    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args = toggle_run_args
     args.model = small_models_with_ppl.name
@@ -179,7 +179,6 @@ def acc_args_and_acc(default_run_args, request):
 @pytest.mark.llm
 @requires_pt_ge('2.2')
 def test_small_models_acc(caplog, acc_args_and_acc):
-    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
     float_ppl, quant_ppl, model = main(args)
@@ -303,7 +302,6 @@ def layer_args(default_run_args, request):
 @pytest.mark.llm
 @requires_pt_ge('2.2')
 def test_small_models_quant_layer(caplog, layer_args):
-    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args
     float_ppl, quant_ppl, model = main(args)
@@ -368,7 +366,6 @@ def torch_export_args(default_run_args, request):
 @jit_disabled_for_export()
 @requires_pt_ge('2.2')
 def test_small_models_torch_export(caplog, torch_export_args):
-    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args = torch_export_args
     float_ppl, quant_ppl, model = main(args)

From 9e3638b417ac1714b877be4ab732172cec1a4d38 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:07:14 +0100
Subject: [PATCH 37/53] Revert "test (ex/llm): Removed onnx dependency when
 collecting tests."

This reverts commit d4509c6e8cc56188b6b4f64af52c55fb4f323f79.
---
 tests/brevitas_examples/test_llm.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 9e12fc3cf..b0eae9dcf 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -8,6 +8,7 @@
 import shutil
 
 import numpy as np
+import onnx
 import pytest
 import torch
 
@@ -331,9 +332,6 @@ def onnx_export_args(default_run_args, request):
 @jit_disabled_for_export()
 @requires_pt_ge('2.2')
 def test_small_models_onnx_export(caplog, onnx_export_args):
-    import onnx
-
-    from brevitas_examples.llm.main import main
     caplog.set_level(logging.INFO)
     args = onnx_export_args
     float_ppl, quant_ppl, model = main(args)

From a60911e2ee5a67b02dad91267c1118b1b2310c95 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:08:23 +0100
Subject: [PATCH 38/53] Precommit

---
 noxfile.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index ff0ea3d29..2424e655a 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -108,7 +108,13 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
     install_pytorch(pytorch, session)
     install_torchvision(pytorch, session)  # For CV eval scripts
     session.install('--upgrade', '.[test, tts, stt, vision]')
-    session.run('pytest', '-n', 'logical', '--ignore-glob', 'tests/brevitas_examples/*llm*', 'tests/brevitas_examples')
+    session.run(
+        'pytest',
+        '-n',
+        'logical',
+        '--ignore-glob',
+        'tests/brevitas_examples/*llm*',
+        'tests/brevitas_examples')
 
 
 @nox.session(python=PYTHON_VERSIONS)

From a48c74e8f5f86c7aef01cdde6b51ce104ba7bb1e Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:14:22 +0100
Subject: [PATCH 39/53] test (nox): Updated pytorch torchvision version list

---
 noxfile.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 2424e655a..27c469592 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -29,7 +29,10 @@
     '1.12.1': '0.13.1',
     '1.13.0': '0.14.0',
     '2.0.1': '0.15.2',
-    '2.1.0': '0.16.0'}
+    '2.1.0': '0.16.0',
+    '2.2.2': '0.17.2',
+    '2.3.1': '0.18.1',
+    '2.4.0': '0.19.0'}
 
 PARSED_TORCHVISION_VERSION_DICT = {version.parse(k): v for k, v in TORCHVISION_VERSION_DICT.items()}
 
@@ -124,6 +127,7 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status):
 def tests_brevitas_examples_llm(session, pytorch, jit_status):
     session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled'))
     install_pytorch(pytorch, session)
+    install_torchvision(pytorch, session)  # Optimum seems to require torchvision
     session.install(
         '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu')
     session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples/test_llm.py')

From 2c5e01adf1efe4d51b353cb4e47c3589c96e8550 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:34:49 +0100
Subject: [PATCH 40/53] Updated install method for PT>=2.4

---
 noxfile.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index 27c469592..8ffdd0c53 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -7,6 +7,7 @@
 
 import nox
 from packaging import version
+from packaging.version import parse
 
 sys.path.append(os.path.join(os.path.dirname(__file__), os.path.join('.', '.github', 'workflows')))
 from gen_github_actions import EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS
@@ -15,7 +16,8 @@
 from gen_github_actions import PYTORCH_VERSIONS
 
 IS_OSX = system() == 'Darwin'
-PYTORCH_STABLE_WHEEL_SRC = 'https://download.pytorch.org/whl/torch_stable.html'
+PYTORCH_STABLE_WHEEL_SRC = 'https://download.pytorch.org/whl/cpu'
+PYTORCH_STABLE_WHEEL_SRC_LEGACY = 'https://download.pytorch.org/whl/torch_stable.html'
 PYTORCH_IDS = tuple([f'pytorch_{i}' for i in PYTORCH_VERSIONS])
 EXAMPLES_LLM_PYTEST_PYTORCH_IDS = tuple([
     f'pytorch_{i}' for i in EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS])
@@ -39,7 +41,11 @@
 
 def install_pytorch(pytorch, session):
     if not IS_OSX:
-        cmd = [f'torch=={pytorch}+cpu', '-f', PYTORCH_STABLE_WHEEL_SRC]
+        if parse(pytorch) < parse('2.4.0'):
+            cmd = [f'torch=={pytorch}+cpu', '-f', PYTORCH_STABLE_WHEEL_SRC_LEGACY]
+        else:
+            cmd = [f'torch=={pytorch}', '--index-url', PYTORCH_STABLE_WHEEL_SRC]
+
     else:
         cmd = [f'torch=={pytorch}']
     session.install(*cmd)
@@ -48,11 +54,18 @@ def install_pytorch(pytorch, session):
 def install_torchvision(pytorch, session):
     torchvision = PARSED_TORCHVISION_VERSION_DICT[version.parse(pytorch)]
     if not IS_OSX:
-        cmd = [
-            f'torch=={pytorch}+cpu',  # make sure correct pytorch version is kept
-            f'torchvision=={torchvision}+cpu',
-            '-f',
-            PYTORCH_STABLE_WHEEL_SRC]
+        if parse(pytorch) < parse('2.4.0'):
+            cmd = [
+                f'torch=={pytorch}+cpu',  # make sure correct pytorch version is kept
+                f'torchvision=={torchvision}+cpu',
+                '-f',
+                PYTORCH_STABLE_WHEEL_SRC_LEGACY]
+        else:
+            cmd = [
+                f'torch=={pytorch}',
+                f'torchvision=={torchvision}',
+                '--index-url',
+                PYTORCH_STABLE_WHEEL_SRC]
     else:
         cmd = [f'torch=={pytorch}', f'torchvision=={torchvision}']
     session.install(*cmd)

From 36c60377b27ccbd6ef738d4a2dc4134141fd53a0 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:41:14 +0100
Subject: [PATCH 41/53] test (llm/nox/gha): Updated env setup

---
 .github/workflows/gen_github_actions.py           | 2 +-
 .github/workflows/reduced_examples_llm_pytest.yml | 2 +-
 noxfile.py                                        | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py
index 316d21a4d..2c4908a6c 100644
--- a/.github/workflows/gen_github_actions.py
+++ b/.github/workflows/gen_github_actions.py
@@ -28,7 +28,7 @@
 
 EXAMPLES_LLM_PYTEST_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)),
                                          ('pytorch_version', list(
-                                             ('2.2.2',))), ('platform', PLATFORM_LIST_REDUCED)])
+                                             ('2.4.0',))), ('platform', PLATFORM_LIST_REDUCED)])
 
 FINN_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)),
                           ('pytorch_version', list(PYTORCH_LIST_REDUCED)),
diff --git a/.github/workflows/reduced_examples_llm_pytest.yml b/.github/workflows/reduced_examples_llm_pytest.yml
index ca7fa1dc6..b9c3deffe 100644
--- a/.github/workflows/reduced_examples_llm_pytest.yml
+++ b/.github/workflows/reduced_examples_llm_pytest.yml
@@ -18,7 +18,7 @@ jobs:
 
       matrix:
         python_version: ['3.8']
-        pytorch_version: ['2.2.2']
+        pytorch_version: ['2.4.0']
         platform: ['ubuntu-latest']
         jit_status: ['jit_disabled']
 
diff --git a/noxfile.py b/noxfile.py
index 8ffdd0c53..17a38789d 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -141,8 +141,7 @@ def tests_brevitas_examples_llm(session, pytorch, jit_status):
     session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled'))
     install_pytorch(pytorch, session)
     install_torchvision(pytorch, session)  # Optimum seems to require torchvision
-    session.install(
-        '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu')
+    session.install('-e', '.[test, llm, export]')
     session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples/test_llm.py')
 
 

From 6215aa9353171a4707692196c16d8d8e2a53c5cf Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 14:49:13 +0100
Subject: [PATCH 42/53] test (ex/llm): Updated test settings across multiple
 versions

---
 tests/brevitas_examples/test_llm.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index b0eae9dcf..86d91ea57 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -24,11 +24,11 @@ def ptid2pathname(string):
 
 
 def allclose(x, y):
-    return np.allclose(x, y, rtol=1e-04, atol=3e-00, equal_nan=False)
+    return np.allclose(x, y, rtol=1e-04, atol=1e+01, equal_nan=False)
 
 
 def allveryclose(x, y):
-    return np.allclose(x, y, rtol=1e-08, atol=1e-01, equal_nan=False)
+    return np.allclose(x, y, rtol=1e-08, atol=1e+01, equal_nan=False)
 
 
 def allexact(x, y):
@@ -133,7 +133,7 @@ def toggle_run_args(default_run_args, request):
 
 
 @pytest.mark.llm
-@requires_pt_ge('2.2')
+@requires_pt_ge('2.4')
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
     caplog.set_level(logging.INFO)
     args = toggle_run_args
@@ -178,7 +178,7 @@ def acc_args_and_acc(default_run_args, request):
 
 
 @pytest.mark.llm
-@requires_pt_ge('2.2')
+@requires_pt_ge('2.4')
 def test_small_models_acc(caplog, acc_args_and_acc):
     caplog.set_level(logging.INFO)
     args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
@@ -301,7 +301,7 @@ def layer_args(default_run_args, request):
 
 
 @pytest.mark.llm
-@requires_pt_ge('2.2')
+@requires_pt_ge('2.4')
 def test_small_models_quant_layer(caplog, layer_args):
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args

From 3cac63e96b06028a6f23a8baf894ee09c9f060bf Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Fri, 23 Aug 2024 17:13:02 +0100
Subject: [PATCH 43/53] test (ex/llm): partitioned tests between pytorch
 versions.

---
 tests/brevitas_examples/test_llm.py | 112 ++++++++++++++++++++++------
 1 file changed, 91 insertions(+), 21 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 86d91ea57..2873debd1 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -24,11 +24,11 @@ def ptid2pathname(string):
 
 
 def allclose(x, y):
-    return np.allclose(x, y, rtol=1e-04, atol=1e+01, equal_nan=False)
+    return np.allclose(x, y, rtol=1e-03, atol=1e+01, equal_nan=False)
 
 
 def allveryclose(x, y):
-    return np.allclose(x, y, rtol=1e-08, atol=1e+01, equal_nan=False)
+    return np.allclose(x, y, rtol=1e-04, atol=2e+02, equal_nan=False)
 
 
 def allexact(x, y):
@@ -74,11 +74,6 @@ class ModelAndPpl:
             float_ppl=None,
             supports_fx=True,
         ),
-        ModelAndPpl(
-            name="hf-internal-testing/tiny-random-OPTForCausalLM",
-            float_ppl=None,
-            supports_fx=True,
-        ),
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-MistralForCausalLM",
             float_ppl=None,
@@ -133,7 +128,7 @@ def toggle_run_args(default_run_args, request):
 
 
 @pytest.mark.llm
-@requires_pt_ge('2.4')
+@requires_pt_ge('2.2')
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
     caplog.set_level(logging.INFO)
     args = toggle_run_args
@@ -145,6 +140,32 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
     float_ppl, quant_ppl, model = main(args)
 
 
+@pytest.fixture(
+    scope="session",
+    params=[
+        ModelAndPpl(
+            name="hf-internal-testing/tiny-random-OPTForCausalLM",
+            float_ppl=None,
+            supports_fx=True,
+        ),])
+def small_models_with_ppl_pt_ge_2_4(request):
+    yield request.param
+
+
+@pytest.mark.llm
+@requires_pt_ge('2.4')
+def test_small_models_toggle_run_args_pt_ge_2_4(
+        caplog, toggle_run_args, small_models_with_ppl_pt_ge_2_4):
+    caplog.set_level(logging.INFO)
+    args = toggle_run_args
+    args.model = small_models_with_ppl_pt_ge_2_4.name
+    exp_float_ppl = small_models_with_ppl_pt_ge_2_4.float_ppl
+    use_fx = requires_fx(args)
+    if use_fx and not small_models_with_ppl_pt_ge_2_4.supports_fx:
+        pytest.xfail(f"{small_models_with_ppl.name} does not support FX")
+    float_ppl, quant_ppl, model = main(args)
+
+
 @pytest.fixture(
     params=[
         {
@@ -158,7 +179,36 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
             "act_equalization": "fx",
             "bias_corr": True,
             "float_ppl": 33239.5,
-            "quant_ppl": 33283.75390625},
+            "quant_ppl": 33283.75390625},])
+def acc_args_and_acc(default_run_args, request):
+    args = default_run_args
+    run_dict = request.param
+    float_ppl = run_dict["float_ppl"]
+    quant_ppl = run_dict["quant_ppl"]
+    del run_dict["float_ppl"]
+    del run_dict["quant_ppl"]
+    args.update(**run_dict)
+    yield args, float_ppl, quant_ppl
+
+
+@pytest.mark.llm
+@requires_pt_ge('2.2')
+def test_small_models_acc(caplog, acc_args_and_acc):
+    caplog.set_level(logging.INFO)
+    args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
+    float_ppl, quant_ppl, model = main(args)
+    float_ppl = float_ppl.detach().cpu().numpy()
+    quant_ppl = quant_ppl.detach().cpu().numpy()
+    if config.JIT_ENABLED:
+        assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+        assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+    else:
+        assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+        assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+
+
+@pytest.fixture(
+    params=[
         {
             "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
             "weight_equalization": True,
@@ -166,7 +216,7 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
             "replace_mha": True,
             "float_ppl": 50016.0,
             "quant_ppl": 50016.0},])
-def acc_args_and_acc(default_run_args, request):
+def acc_args_and_acc_pt_ge_2_4(default_run_args, request):
     args = default_run_args
     run_dict = request.param
     float_ppl = run_dict["float_ppl"]
@@ -179,9 +229,9 @@ def acc_args_and_acc(default_run_args, request):
 
 @pytest.mark.llm
 @requires_pt_ge('2.4')
-def test_small_models_acc(caplog, acc_args_and_acc):
+def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4):
     caplog.set_level(logging.INFO)
-    args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
+    args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc_pt_ge_2_4
     float_ppl, quant_ppl, model = main(args)
     float_ppl = float_ppl.detach().cpu().numpy()
     quant_ppl = quant_ppl.detach().cpu().numpy()
@@ -278,14 +328,6 @@ def test_small_models_acc(caplog, acc_args_and_acc):
                     "<class 'brevitas.nn.equalized_layer.EqualizedModule'>",
                 "model.layers.0.self_attn.q_proj.layer":
                     "<class 'brevitas.nn.quant_linear.QuantLinear'>",}},
-        {
-            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
-            "replace_mha": True,
-            "exp_layer_types": {
-                "model.decoder.layers.0.self_attn":
-                    "<class 'brevitas_examples.llm.llm_quant.mha_layers.QuantizableOPTAttention'>",
-                "model.decoder.layers.0.self_attn.mha":
-                    "<class 'brevitas.nn.quant_mha.QuantMultiheadAttention'>",}},
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
             "quantize_last_layer": True,
@@ -301,7 +343,7 @@ def layer_args(default_run_args, request):
 
 
 @pytest.mark.llm
-@requires_pt_ge('2.4')
+@requires_pt_ge('2.2')
 def test_small_models_quant_layer(caplog, layer_args):
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args
@@ -309,6 +351,34 @@ def test_small_models_quant_layer(caplog, layer_args):
     assert_layer_types(model, exp_layer_types)
 
 
+@pytest.fixture(
+    params=[
+        {
+            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
+            "replace_mha": True,
+            "exp_layer_types": {
+                "model.decoder.layers.0.self_attn":
+                    "<class 'brevitas_examples.llm.llm_quant.mha_layers.QuantizableOPTAttention'>",
+                "model.decoder.layers.0.self_attn.mha":
+                    "<class 'brevitas.nn.quant_mha.QuantMultiheadAttention'>",}},])
+def layer_args_pt_ge_2_4(default_run_args, request):
+    args = default_run_args
+    layer_dict = request.param
+    exp_layer_types = layer_dict["exp_layer_types"]
+    del layer_dict["exp_layer_types"]
+    args.update(**layer_dict)
+    yield args, exp_layer_types
+
+
+@pytest.mark.llm
+@requires_pt_ge('2.4')
+def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4):
+    caplog.set_level(logging.INFO)
+    args, exp_layer_types = layer_args_pt_ge_2_4
+    float_ppl, quant_ppl, model = main(args)
+    assert_layer_types(model, exp_layer_types)
+
+
 @pytest.fixture(
     params=[
         {

From b0d6c63bb1c753828d7fc75985634362170d6458 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 26 Aug 2024 11:13:21 +0100
Subject: [PATCH 44/53] test (ex/llm): Check input_view_impl for MX types

---
 tests/brevitas_examples/test_llm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 2873debd1..9aae02614 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -314,11 +314,11 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4):
                     "<class 'brevitas.nn.quant_linear.QuantLinear'>",
                 "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant":
                     "<class 'brevitas.core.quant.float.FloatQuant'>",
-                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl":
-                    "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",
+                "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.input_view_impl":
+                    "<class 'brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView'>",
                 "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant":
                     "<class 'brevitas.core.quant.float.FloatQuant'>",
-                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl":
+                "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.input_view_impl":
                     "<class 'brevitas.core.function_wrapper.shape.OverSubChannelBlockView'>",}},
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",

From f5dbed207cee1335634ea169814a31f437dd24c9 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 16:53:00 +0100
Subject: [PATCH 45/53] test (example/llm): Switched to PyTest cases. Added ids
 for more readable tests.

---
 tests/brevitas_examples/test_llm.py | 65 ++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 9aae02614..b2c89274f 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -10,6 +10,7 @@
 import numpy as np
 import onnx
 import pytest
+import pytest_cases
 import torch
 
 from brevitas import config
@@ -66,8 +67,13 @@ class ModelAndPpl:
     supports_fx: bool
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
     scope="session",
+    ids=[
+        "llama",
+        "mistral",
+        #"mixtral",
+    ],
     params=[
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-LlamaForCausalLM",
@@ -89,7 +95,7 @@ def small_models_with_ppl(request):
     yield request.param
 
 
-@pytest.fixture()
+@pytest_cases.fixture()
 def default_run_args(request):
     args = UpdatableNamespace(**vars(parse_args([])))
     args.nsamples = 2
@@ -106,7 +112,16 @@ def default_run_args(request):
     return args
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "defaults",
+        "bias_corr=True",
+        "act_equalization=layerwise",
+        "act_equalization=fx",
+        "weight_equalization=True",
+        "gptq=True",
+        "ln_affine_merge=True",
+    ],
     params=[
         {},
         {
@@ -140,8 +155,11 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
     float_ppl, quant_ppl, model = main(args)
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
     scope="session",
+    ids=[
+        "opt",
+    ],
     params=[
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-OPTForCausalLM",
@@ -166,7 +184,11 @@ def test_small_models_toggle_run_args_pt_ge_2_4(
     float_ppl, quant_ppl, model = main(args)
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "llama",
+        "mistral",
+    ],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
@@ -207,7 +229,10 @@ def test_small_models_acc(caplog, acc_args_and_acc):
         assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "opt-replace-mha",
+    ],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
@@ -243,7 +268,16 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4):
         assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "mistral-int8",
+        "mistral-weight-only",
+        "mistral-fp8_ocp",
+        "mistral-fp8_fnuz",
+        "llama-mxfp8",
+        "llama-int8-act_equalization=layerwise",
+        "mistral-int8-quant-last-layer",
+    ],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
@@ -351,7 +385,10 @@ def test_small_models_quant_layer(caplog, layer_args):
     assert_layer_types(model, exp_layer_types)
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "opt-replace-mha",
+    ],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
@@ -379,7 +416,11 @@ def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4):
     assert_layer_types(model, exp_layer_types)
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "qcdq-asym",
+        "qcdq-sym",
+    ],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
@@ -409,7 +450,11 @@ def test_small_models_onnx_export(caplog, onnx_export_args):
     shutil.rmtree(args.export_prefix)
 
 
-@pytest.fixture(
+@pytest_cases.fixture(
+    ids=[
+        "qcdq-asym",
+        "qcdq-sym",
+    ],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",

From c544d5730b7f3daa5961bad9c2493b9c01d091b2 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 16:57:45 +0100
Subject: [PATCH 46/53] test (example/llm): Added comment about Mixtral case

---
 tests/brevitas_examples/test_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index b2c89274f..26ca4a9a4 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -85,7 +85,7 @@ class ModelAndPpl:
             float_ppl=None,
             supports_fx=False,
         ),
-        #ModelAndPpl(
+        #ModelAndPpl( # Ready for MoE support
         #    name="dacorvo/Mixtral-tiny",
         #    float_ppl=None,
         #    supports_fx=True,

From 08b8b732bb2200d73c859069b4c14e9e92e480b1 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 17:01:34 +0100
Subject: [PATCH 47/53] pre-commit

---
 tests/brevitas_examples/test_llm.py | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 26ca4a9a4..0585b7219 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -71,8 +71,7 @@ class ModelAndPpl:
     scope="session",
     ids=[
         "llama",
-        "mistral",
-        #"mixtral",
+        "mistral",  #"mixtral",
     ],
     params=[
         ModelAndPpl(
@@ -120,8 +119,7 @@ def default_run_args(request):
         "act_equalization=fx",
         "weight_equalization=True",
         "gptq=True",
-        "ln_affine_merge=True",
-    ],
+        "ln_affine_merge=True",],
     params=[
         {},
         {
@@ -158,8 +156,7 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
 @pytest_cases.fixture(
     scope="session",
     ids=[
-        "opt",
-    ],
+        "opt",],
     params=[
         ModelAndPpl(
             name="hf-internal-testing/tiny-random-OPTForCausalLM",
@@ -187,8 +184,7 @@ def test_small_models_toggle_run_args_pt_ge_2_4(
 @pytest_cases.fixture(
     ids=[
         "llama",
-        "mistral",
-    ],
+        "mistral",],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
@@ -231,8 +227,7 @@ def test_small_models_acc(caplog, acc_args_and_acc):
 
 @pytest_cases.fixture(
     ids=[
-        "opt-replace-mha",
-    ],
+        "opt-replace-mha",],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
@@ -276,8 +271,7 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4):
         "mistral-fp8_fnuz",
         "llama-mxfp8",
         "llama-int8-act_equalization=layerwise",
-        "mistral-int8-quant-last-layer",
-    ],
+        "mistral-int8-quant-last-layer",],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-MistralForCausalLM",
@@ -387,8 +381,7 @@ def test_small_models_quant_layer(caplog, layer_args):
 
 @pytest_cases.fixture(
     ids=[
-        "opt-replace-mha",
-    ],
+        "opt-replace-mha",],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
@@ -419,8 +412,7 @@ def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4):
 @pytest_cases.fixture(
     ids=[
         "qcdq-asym",
-        "qcdq-sym",
-    ],
+        "qcdq-sym",],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",
@@ -453,8 +445,7 @@ def test_small_models_onnx_export(caplog, onnx_export_args):
 @pytest_cases.fixture(
     ids=[
         "qcdq-asym",
-        "qcdq-sym",
-    ],
+        "qcdq-sym",],
     params=[
         {
             "model": "hf-internal-testing/tiny-random-LlamaForCausalLM",

From 6d339908d85bf5df66829fe55a1a4dff32eb7824 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 17:02:06 +0100
Subject: [PATCH 48/53] test (example/llm): Removed JIT clause for accuracy
 tests.

---
 tests/brevitas_examples/test_llm.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 0585b7219..b1871045a 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -217,12 +217,8 @@ def test_small_models_acc(caplog, acc_args_and_acc):
     float_ppl, quant_ppl, model = main(args)
     float_ppl = float_ppl.detach().cpu().numpy()
     quant_ppl = quant_ppl.detach().cpu().numpy()
-    if config.JIT_ENABLED:
-        assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
-        assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
-    else:
-        assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
-        assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+    assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+    assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
 
 
 @pytest_cases.fixture(

From e59287b116e394d7cde73f877e9bad5d784aad35 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 17:45:36 +0100
Subject: [PATCH 49/53] test (example/test): Removed extra JIT checks.

---
 tests/brevitas_examples/test_llm.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index b1871045a..7e786f4a4 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -251,12 +251,8 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4):
     float_ppl, quant_ppl, model = main(args)
     float_ppl = float_ppl.detach().cpu().numpy()
     quant_ppl = quant_ppl.detach().cpu().numpy()
-    if config.JIT_ENABLED:
-        assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
-        assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
-    else:
-        assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
-        assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
+    assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
+    assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}"
 
 
 @pytest_cases.fixture(

From 1399d844aab6dc6629a339784cf8d6d235a35ef9 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 17:48:17 +0100
Subject: [PATCH 50/53] test (example/llm): refactor run tests.

---
 tests/brevitas_examples/test_llm.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 7e786f4a4..55974af86 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -111,6 +111,15 @@ def default_run_args(request):
     return args
 
 
+def run_test_models_run_args(args, model_with_ppl):
+    args.model = model_with_ppl.name
+    exp_float_ppl = model_with_ppl.float_ppl
+    use_fx = requires_fx(args)
+    if use_fx and not model_with_ppl.supports_fx:
+        pytest.xfail(f"{model_with_ppl.name} does not support FX")
+    float_ppl, quant_ppl, model = main(args)
+
+
 @pytest_cases.fixture(
     ids=[
         "defaults",
@@ -144,13 +153,7 @@ def toggle_run_args(default_run_args, request):
 @requires_pt_ge('2.2')
 def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl):
     caplog.set_level(logging.INFO)
-    args = toggle_run_args
-    args.model = small_models_with_ppl.name
-    exp_float_ppl = small_models_with_ppl.float_ppl
-    use_fx = requires_fx(args)
-    if use_fx and not small_models_with_ppl.supports_fx:
-        pytest.xfail(f"{small_models_with_ppl.name} does not support FX")
-    float_ppl, quant_ppl, model = main(args)
+    run_test_models_run_args(toggle_run_args, small_models_with_ppl)
 
 
 @pytest_cases.fixture(
@@ -172,13 +175,7 @@ def small_models_with_ppl_pt_ge_2_4(request):
 def test_small_models_toggle_run_args_pt_ge_2_4(
         caplog, toggle_run_args, small_models_with_ppl_pt_ge_2_4):
     caplog.set_level(logging.INFO)
-    args = toggle_run_args
-    args.model = small_models_with_ppl_pt_ge_2_4.name
-    exp_float_ppl = small_models_with_ppl_pt_ge_2_4.float_ppl
-    use_fx = requires_fx(args)
-    if use_fx and not small_models_with_ppl_pt_ge_2_4.supports_fx:
-        pytest.xfail(f"{small_models_with_ppl.name} does not support FX")
-    float_ppl, quant_ppl, model = main(args)
+    run_test_models_run_args(toggle_run_args, small_models_with_ppl_pt_ge_2_4)
 
 
 @pytest_cases.fixture(

From 0e762f77a60f8776a544d712edd3b4ed2251463c Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Mon, 2 Sep 2024 18:03:06 +0100
Subject: [PATCH 51/53] test (example/llm): Added comments about PT versions.

---
 tests/brevitas_examples/test_llm.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 55974af86..e57bf46cd 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -14,6 +14,7 @@
 import torch
 
 from brevitas import config
+# LLM example depends on optimum-amd, which requires PyTorch>=2.2
 from brevitas_examples.llm.main import main
 from brevitas_examples.llm.main import parse_args
 from tests.marker import jit_disabled_for_export
@@ -162,7 +163,7 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with
         "opt",],
     params=[
         ModelAndPpl(
-            name="hf-internal-testing/tiny-random-OPTForCausalLM",
+            name="hf-internal-testing/tiny-random-OPTForCausalLM",  # Requires PT>=2.4 to run
             float_ppl=None,
             supports_fx=True,
         ),])
@@ -223,7 +224,7 @@ def test_small_models_acc(caplog, acc_args_and_acc):
         "opt-replace-mha",],
     params=[
         {
-            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
+            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",  # Requires PT>=2.4 to run
             "weight_equalization": True,
             "ln_affine_merge": True,
             "replace_mha": True,
@@ -373,7 +374,7 @@ def test_small_models_quant_layer(caplog, layer_args):
         "opt-replace-mha",],
     params=[
         {
-            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",
+            "model": "hf-internal-testing/tiny-random-OPTForCausalLM",  # Requires PT>=2.4 to run
             "replace_mha": True,
             "exp_layer_types": {
                 "model.decoder.layers.0.self_attn":

From 6e9283a87ebb8bc42d8aa281952de3c116cbadf0 Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Tue, 10 Sep 2024 12:03:13 +0100
Subject: [PATCH 52/53] test (example/llm): Added tests to ensure all args to
 `main` are also in `parse_args`

---
 tests/brevitas_examples/test_llm.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index e57bf46cd..8fa406c78 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -37,6 +37,20 @@ def allexact(x, y):
     return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False)
 
 
+# Check that all args in args are used
+def validate_args(args):
+    a = vars(args)
+    da = vars(parse_args([]))
+    for k in a.keys():
+        assert k in da.keys(), f"Key {k} does not seem to be a valid argument for `main`"
+
+
+def validate_args_and_run_main(args):
+    validate_args(args)
+    float_ppl, quant_ppl, model = main(args)
+    return float_ppl, quant_ppl, model
+
+
 def assert_layer_types(model, exp_layer_types):
     for key, string in exp_layer_types.items():
         matched = False
@@ -118,7 +132,7 @@ def run_test_models_run_args(args, model_with_ppl):
     use_fx = requires_fx(args)
     if use_fx and not model_with_ppl.supports_fx:
         pytest.xfail(f"{model_with_ppl.name} does not support FX")
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
 
 
 @pytest_cases.fixture(
@@ -212,7 +226,7 @@ def acc_args_and_acc(default_run_args, request):
 def test_small_models_acc(caplog, acc_args_and_acc):
     caplog.set_level(logging.INFO)
     args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
     float_ppl = float_ppl.detach().cpu().numpy()
     quant_ppl = quant_ppl.detach().cpu().numpy()
     assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
@@ -246,7 +260,7 @@ def acc_args_and_acc_pt_ge_2_4(default_run_args, request):
 def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4):
     caplog.set_level(logging.INFO)
     args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc_pt_ge_2_4
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
     float_ppl = float_ppl.detach().cpu().numpy()
     quant_ppl = quant_ppl.detach().cpu().numpy()
     assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}"
@@ -365,7 +379,7 @@ def layer_args(default_run_args, request):
 def test_small_models_quant_layer(caplog, layer_args):
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
     assert_layer_types(model, exp_layer_types)
 
 
@@ -395,7 +409,7 @@ def layer_args_pt_ge_2_4(default_run_args, request):
 def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4):
     caplog.set_level(logging.INFO)
     args, exp_layer_types = layer_args_pt_ge_2_4
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
     assert_layer_types(model, exp_layer_types)
 
 
@@ -427,7 +441,7 @@ def onnx_export_args(default_run_args, request):
 def test_small_models_onnx_export(caplog, onnx_export_args):
     caplog.set_level(logging.INFO)
     args = onnx_export_args
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
     onnx_model = onnx.load(os.path.join(args.export_prefix, "model.onnx"))
     shutil.rmtree(args.export_prefix)
 
@@ -462,7 +476,7 @@ def torch_export_args(default_run_args, request):
 def test_small_models_torch_export(caplog, torch_export_args):
     caplog.set_level(logging.INFO)
     args = torch_export_args
-    float_ppl, quant_ppl, model = main(args)
+    float_ppl, quant_ppl, model = validate_args_and_run_main(args)
     filepath = args.export_prefix + ".pt"
     torchscript_model = torch.jit.load(filepath)
     os.remove(filepath)

From d07119913135d489302f411f329890966b16ce1b Mon Sep 17 00:00:00 2001
From: Nick Fraser <icanlosh@gmail.com>
Date: Wed, 11 Sep 2024 12:26:28 +0100
Subject: [PATCH 53/53] test (example/llm): fixed indentation for
 `toggle_run_args`

---
 tests/brevitas_examples/test_llm.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py
index 8fa406c78..6a98911a9 100644
--- a/tests/brevitas_examples/test_llm.py
+++ b/tests/brevitas_examples/test_llm.py
@@ -135,6 +135,7 @@ def run_test_models_run_args(args, model_with_ppl):
     float_ppl, quant_ppl, model = validate_args_and_run_main(args)
 
 
+# yapf: disable
 @pytest_cases.fixture(
     ids=[
         "defaults",
@@ -146,18 +147,13 @@ def run_test_models_run_args(args, model_with_ppl):
         "ln_affine_merge=True",],
     params=[
         {},
-        {
-            "bias_corr": True},
-        {
-            "act_equalization": "layerwise"},
-        {
-            "act_equalization": "fx"},
-        {
-            "weight_equalization": True},
-        {
-            "gptq": True},
-        {
-            "ln_affine_merge": True},])
+        {"bias_corr": True},
+        {"act_equalization": "layerwise"},
+        {"act_equalization": "fx"},
+        {"weight_equalization": True},
+        {"gptq": True},
+        {"ln_affine_merge": True},])
+# yapf: enable
 def toggle_run_args(default_run_args, request):
     args = default_run_args
     args.update(**request.param)