diff --git a/src/brevitas/graph/equalize.py b/src/brevitas/graph/equalize.py
index 8b470091d..025abddd7 100644
--- a/src/brevitas/graph/equalize.py
+++ b/src/brevitas/graph/equalize.py
@@ -18,6 +18,7 @@
 from brevitas.fx import Node
 from brevitas.graph.base import GraphTransform
 from brevitas.graph.base import ModuleInstanceToModuleInstance
+from brevitas.graph.hadamard import get_hadK, matmul_hadU, matmul_hadU_cuda
 from brevitas.graph.utils import get_module
 from brevitas.graph.utils import get_node
 from brevitas.nn.equalized_layer import EqualizedModule, RotatedModule
@@ -25,12 +26,8 @@
 from brevitas.nn.quant_scale_bias import ScaleBias
 from brevitas.utils.torch_utils import KwargsForwardHook
 
-from .base import GraphTransform
 from .base import InsertModuleCallAfter
-try:
-    from scipy.linalg import hadamard
-except ImportError:
-    hadamard = None
+
 __all__ = ['GraphActivationEqualization', 'LayerwiseActivationEqualization', 'EqualizeGraph']
 
 EPSILON = 1e-9
@@ -680,7 +677,7 @@ def _is_scale_varying_activation(graph_model, node):
 
 
 def _is_scale_invariant_function(node: Node, scale_invariant_op: Set =_scale_invariant_op) -> bool:
-    out = node.op == 'call_function' and node.target in scale_invariant_op + _select_op + _reshaping_op
+    out = node.op in ('call_function', 'call_method') and node.target in scale_invariant_op + _select_op + _reshaping_op
     if node.target == torch.nn.functional.interpolate:
         out &= node.kwargs.get('mode', None) == 'nearest'
     return out
@@ -1205,55 +1202,75 @@ def insert_mul_node(self, scale, shape, axis, act_node, batch_dim=0):
         rewriter = InsertModuleCallAfter(mul_factor_name, act_node)
         rewriter.apply(self.model)
 
+def _apply_had_device(tensor, had_K, K):
+    is_cuda = 'cuda' in str(tensor.device) and torch.version.cuda is not None
+    # Accelerated kernel only available for CUDA
+    if is_cuda:
+        return matmul_hadU_cuda(tensor, had_K, K)
+    else:
+        return matmul_hadU(tensor)
 
 def _apply_rotate(model: nn.Module, regions: List[Region], insert_rotation_func: bool = False):
     for region in regions:
-        for name in (region.srcs_names + region.sinks_names):
-            module = region.get_module_from_name(name)
-            if hasattr(module, 'allocate_params'):
-                module.allocate_params(module)
 
         if not insert_rotation_func and not region.is_valid:
             continue
         hidden_dim = region.max_shape_sinks
-        # Check that hidden_dim is an exact Po2
-        if torch.log2(torch.tensor(hidden_dim)) != torch.ceil(torch.log2(torch.tensor(hidden_dim))):
-            continue
 
-        # Build hadamard rotation matrix
-        h = torch.from_numpy(hadamard(hidden_dim)) / torch.sqrt(torch.tensor(hidden_dim))
-        hadamard_inverse = h.t()
+        try:
+            # Build hadamard rotation matrix
+            had_K, K = get_hadK(hidden_dim) 
+        except AssertionError as e:
+            print("Incomptible shapes")
+            raise e
+
         for name, indexes in region.srcs.items():
             module = region.get_module_from_name(name)
+            if hasattr(module, 'allocate_params'):
+                module.allocate_params(module)
             axis = _get_output_axis(module)
-            h_inv = hadamard_inverse.type_as(module.weight.data)
+            weight = module.weight.data
+
             if axis == 0:
-                module.weight.data = torch.matmul(h_inv, module.weight.data)
+                weight =  _apply_had_device(weight.t(), had_K, K).t() # matmul_hadU_cuda(weight.t(), had_K, K).t()
             elif axis == 1:
-                module.weight.data = torch.matmul(module.weight.data, h_inv)
+                weight =  _apply_had_device(weight, had_K, K)
             else:
                 raise RuntimeError("Not supported yet")
+            module.weight.data = weight
+
+            if getattr(module, 'bias', None) is not None:
+                bias = module.bias.data
+                bias =  _apply_had_device(bias, had_K, K) #matmul_hadU_cuda(bias, had_K, K)
+                module.bias.data = bias
+            if hasattr(module, 'offload_params'):
+                module.offload_params(module)
 
         for name, indexes in region.sinks.items():
             module = region.get_module_from_name(name)
-            h = h.type_as(module.weight.data)
+            if hasattr(module, 'allocate_params'):
+                module.allocate_params(module)
             axis = _get_input_axis(module)
+            weight = module.weight.data
+            
             if axis == 1:
-                module.weight.data = torch.matmul(module.weight.data, h)
+                weight =  _apply_had_device(weight, had_K, K)
             elif axis == 0:
-                module.weight.data = torch.matmul(h, module.weight.data)
+                weight =  _apply_had_device(weight.t(), had_K, K).t()
             else:
                 raise RuntimeError("Not supported yet")
-            if insert_rotation_func and len(region.srcs) == 0:
-                rewriter = ModuleInstanceToModuleInstance(
-                module, RotatedModule(h_inv=hadamard_inverse, layer=module))
-            rewriter.apply(model)
 
-        for name in (region.srcs_names + region.sinks_names):
-            module = region.get_module_from_name(name)
+            module.weight.data = weight
             if hasattr(module, 'offload_params'):
                 module.offload_params(module)
 
+            if insert_rotation_func and len(region.srcs) == 0:
+                # print(name, module.in_features, K)
+                rewriter = ModuleInstanceToModuleInstance(
+                module, RotatedModule(had_mat=had_K, k=K, layer=module))
+                rewriter.apply(model)
+
+
 class GraphRotationEqualization(GraphTransform):
 
     def __init__(self) -> None:
@@ -1266,9 +1283,8 @@ def __init__(self) -> None:
 
     def apply(self,
               graph_model: GraphModule) -> Union[Tuple[GraphModule, Set[Tuple[str]]], GraphModule]:
-        # It is not possible to equalize through LayerNorm/BatchNorm as sink
 
-        regions =  _extract_regions(graph_model, state_impl_kwargs={'supported_srcs':self.supported_srcs, 'supported_sinks':self.supported_sinks, 'scale_invariant_layers':self.scale_invariant_layers })
+        regions =  _extract_regions(graph_model, state_impl_kwargs={'supported_srcs':self.supported_srcs, 'supported_sinks':self.supported_sinks, 'scale_invariant_layers':self.scale_invariant_layers, 'scale_invariant_function': self.scale_invariant_function })
         if len(regions) > 0:
             _apply_rotate(graph_model, regions, False)
 
@@ -1291,6 +1307,7 @@ def _merge_ln(layer_norm, next_module, scale_bias_by_weight):
     # We can't do an inplace update as some layers we merge into like lm_head might share the weight tensor
     scale = layer_norm.weight.data.view(view_shape).expand_as(next_module.weight)
     next_module.weight = torch.nn.Parameter(next_module.weight.clone() * scale)
+
     # Merge bias, new_bias includes the bias of next_module by going through its fwd
     if hasattr(layer_norm, 'bias'):
         inp = layer_norm.bias.data.view(view_shape)
@@ -1307,6 +1324,7 @@ def __init__(self) -> None:
 
     def apply(self, graph_model: GraphModule) -> GraphModule:
         regions = _extract_regions(graph_model, state_impl_kwargs={'supported_srcs':self.supported_srcs, 'supported_sinks':self.supported_sinks})
+
         if len(regions) > 0:
             scaled_biases = set()
             for region in regions:
@@ -1314,7 +1332,6 @@ def apply(self, graph_model: GraphModule) -> GraphModule:
                 layernorm_module =region.get_module_from_name(layernorm_module_name)
                 if not layernorm_module.elementwise_affine:
                     continue
-                
                 for name, indexes in region.sinks.items():
                     module = region.get_module_from_name(name)
                     scale_bias = id(module) not in scaled_biases
@@ -1327,7 +1344,7 @@ def apply(self, graph_model: GraphModule) -> GraphModule:
         return graph_model
     
 
-class LayerwiseActivationEqualization(GraphTransform):
+class LayerwiseActivationRotation(GraphTransform):
 
     def __init__(self, blacklist_layer=None):
         super(GraphTransform, self).__init__()
diff --git a/src/brevitas/nn/equalized_layer.py b/src/brevitas/nn/equalized_layer.py
index 7620fbeab..a2339beef 100644
--- a/src/brevitas/nn/equalized_layer.py
+++ b/src/brevitas/nn/equalized_layer.py
@@ -3,7 +3,7 @@
 import torch
 
 from brevitas.nn.quant_mha import QuantMultiheadAttention
-from brevitas.quant_tensor.base_quant_tensor import QuantTensor
+import fast_hadamard_transform
 
 INPUT_NAMES = ['input', 'inp', 'query', 'x', 'hidden_states']
 
@@ -45,15 +45,27 @@ def forward(self, *args, **kwargs):
 
 class RotatedModule(torch.nn.Module):
 
-    def __init__(self, h_inv, layer) -> None:
+    def __init__(self, had_mat, k, layer) -> None:
         super().__init__()
-        self.h_inv = torch.nn.Parameter(h_inv)
+        self.had_mat = torch.nn.Parameter(had_mat).cpu()
         self.layer = layer
+        self.k = k
+
+    def forward(self, inp, **kwargs):
+
+        shape = inp.shape
+        n = inp.shape[-1]
+        if self.k == 1:
+            inp = fast_hadamard_transform.hadamard_transform(inp.contiguous(), 1.0/torch.tensor(n).sqrt()) 
+            o = self.layer(inp)
+
+        # if transpose:
+        #     hadK = hadK.T.contiguous()
+        inp = inp.view(*inp.shape[:-1], self.k, n // self.k)
+        inp = fast_hadamard_transform.hadamard_transform(inp.contiguous(), 1.0/torch.tensor(n).sqrt())
+        inp = self.had_mat.to(inp.device).to(inp.dtype) @ inp
+        inp = inp.reshape(shape)
+        o = self.layer(inp)
+
+        return o
 
-    def forward(self, *args, **kwargs):
-        inp = args[0]
-        if isinstance(inp, QuantTensor):
-            inp = inp.value
-        inp = torch.matmul(inp, self.h_inv)
-        out = self.layer(inp)
-        return out
diff --git a/src/brevitas_examples/llm/llm_quant/ln_affine_merge.py b/src/brevitas_examples/llm/llm_quant/ln_affine_merge.py
index 7ac39347f..14653016f 100644
--- a/src/brevitas_examples/llm/llm_quant/ln_affine_merge.py
+++ b/src/brevitas_examples/llm/llm_quant/ln_affine_merge.py
@@ -6,11 +6,21 @@
 import torch
 from torch import nn
 
-from brevitas.graph.equalize import _is_reshaping_op
-from brevitas.graph.equalize import _is_scale_invariant_module
+# from brevitas.graph.equalize import _is_reshaping_op
+from brevitas.graph.base import ModuleToModuleByClass
+from brevitas.graph.equalize import MergeLnAffine, _is_scale_invariant_module
 from brevitas.graph.utils import get_module
 from brevitas_examples.llm.llm_quant.run_utils import cast_to_float32
 
+def replace_rmsnorm_with_torch(model, config):
+    from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
+    ALL_RMSNORM_LAYERS = [x for x in ALL_LAYERNORM_LAYERS if 'RMS' in x.__name__]
+    rewriters = [ModuleToModuleByClass(rms_cls, torch.nn.RMSNorm, normalized_shape = config.hidden_size, eps = config.rms_norm_eps) for rms_cls in ALL_RMSNORM_LAYERS]
+    dtype = next(iter(model.parameters())).dtype
+    for r in rewriters:
+        model = r.apply(model)
+    model = model.to(dtype)
+    return model
 
 def replace_bias(next_module, new_bias):
     new_bias = new_bias.view(-1)
@@ -49,7 +59,7 @@ def merge_layernorm_affine_params(graph_model):
             module = get_module(graph_model, node.target)
             if isinstance(module, nn.LayerNorm):
                 for next in node.users:
-                    while (_is_reshaping_op(next) or _is_scale_invariant_module(graph_model, next)):
+                    while (_is_scale_invariant_module(graph_model, next)):
                         next = node.next
                     if next.op == 'call_module':
                         next_module = get_module(graph_model, next.target)
@@ -83,8 +93,7 @@ def merge_layernorm_affine_params(graph_model):
 
 
 @torch.no_grad()
-def apply_layernorm_affine_merge(graph_model, dtype):
-    # We can't do fp16 tracing on CPU as many kernels are not implemented
-    # So we have to cast to fp32 first, trace, apply merging, and then cast back
-    with cast_to_float32(graph_model, dtype):
-        merge_layernorm_affine_params(graph_model)
+def apply_layernorm_affine_merge(graph_model):
+    eq = MergeLnAffine()
+    graph_model = eq.apply(graph_model)
+    return graph_model
diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py
index c33de54c8..26572249c 100644
--- a/src/brevitas_examples/llm/main.py
+++ b/src/brevitas_examples/llm/main.py
@@ -8,6 +8,7 @@
 from warnings import warn
 
 import numpy as np
+from brevitas.graph.equalize import GraphRotationEqualization, LayerwiseActivationRotation
 from optimum.amd.brevitas.accelerate_utils import offload_model
 from optimum.amd.brevitas.accelerate_utils import remove_hooks
 from optimum.amd.brevitas.data_utils import compute_perplexity
@@ -31,7 +32,7 @@
 from brevitas_examples.llm.llm_quant.export import brevitas_proxy_export_mode
 from brevitas_examples.llm.llm_quant.gpxq import apply_gpfq
 from brevitas_examples.llm.llm_quant.gpxq import apply_gptq
-from brevitas_examples.llm.llm_quant.ln_affine_merge import apply_layernorm_affine_merge
+from brevitas_examples.llm.llm_quant.ln_affine_merge import apply_layernorm_affine_merge, replace_rmsnorm_with_torch
 from brevitas_examples.llm.llm_quant.prepare_for_quantize import add_zero_bias_to_linear
 from brevitas_examples.llm.llm_quant.prepare_for_quantize import replace_mha_with_quantizable_layers
 from brevitas_examples.llm.llm_quant.run_utils import CastFloat16ToFloat32
@@ -177,25 +178,39 @@ def main(args):
 
     device = next(iter(model.parameters())).device
     print("Data loaded.")
-
-    if args.eval:
-        assert args.export_target != 'torch_qcdq', "TorchScript QCDQ export and Evaluation simultaneously"
-        print("Float model eval...")
-        model = offload_model(model)
-        float_ppl = compute_perplexity(
-            model, validation_loader, context_length=args.seqlen // 2, tokenizer=tokenizer)
-        remove_hooks(model)
-        print(f"Float perplexity ({args.dataset}): {float_ppl:.3f}")
+    
+    # if args.eval:
+    #     assert args.export_target != 'torch_qcdq', "TorchScript QCDQ export and Evaluation simultaneously"
+    #     print("Float model eval...")
+    #     model = offload_model(model)
+    #     float_ppl = compute_perplexity(
+    #         model, validation_loader, context_length=args.seqlen // 2, tokenizer=tokenizer)
+    #     remove_hooks(model)
+    #     print(f"Float perplexity ({args.dataset}): {float_ppl:.3f}")
+
+    if args.replace_rmsnorm:
+        model = replace_rmsnorm_with_torch(model, model.config)
 
     if require_fx:
-        model = get_fx(model)
+        with torch.no_grad():
+            model, guards = torch._dynamo.export(model)(**calibration_loader[0])
 
     # Apply LN affine merging before inserting MHA layers
     # since currently there is support only for merging into Linear
     if args.ln_affine_merge:
         print("Apply LN affine merge...")
-        apply_layernorm_affine_merge(model, dtype)
+        # apply_layernorm_affine_merge(model)
         print("LN affine merge applied.")
+    
+        
+    if args.graph_rotation:
+        assert args.ln_affine_merge
+        assert args.replace_rmsnorm
+        eq = GraphRotationEqualization()
+        model = eq.apply(model)
+    elif args.layerwise_rotation:
+        eq = LayerwiseActivationRotation()
+        model = eq.apply(model)
 
     # Insert standard MHA layers when performing fx based weight/act equalization to avoid dealing
     # with all the variability in HF implementations
@@ -453,6 +468,7 @@ def parse_args(args):
         '--act-calibration', action='store_true', help='Apply activation calibration.')
     parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.')
     parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.')
+    parser.add_argument('--replace-rmsnorm', action='store_true', help='Replace HF RMSNorms with Torch one.')
     parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.')
     parser.add_argument(
         '--no-float16',
@@ -466,6 +482,15 @@ def parse_args(args):
         '--weight-equalization',
         action='store_true',
         help='Apply weight equalization. Relevant to ReLU based models (e.g. OPT).')
+    parser.add_argument(
+        '--graph-rotation',
+        default=True,
+        action='store_true',
+        help='Apply graph rotation equalization')
+    parser.add_argument(
+        '--layerwise-rotation',
+        action='store_true',
+        help='Apply layerwise rotation equalization')
     parser.add_argument(
         '--act-equalization',
         default=None,
diff --git a/tests/brevitas/graph/equalization_fixtures.py b/tests/brevitas/graph/equalization_fixtures.py
index 2719b48a0..cfba3ba30 100644
--- a/tests/brevitas/graph/equalization_fixtures.py
+++ b/tests/brevitas/graph/equalization_fixtures.py
@@ -491,3 +491,36 @@ def forward(self, x):
 
 toy_quant_model = fixture_union(
     'toy_quant_model', list_of_quant_fixtures, ids=list_of_quant_fixtures)
+
+
+## List of Rotation fixtures
+
+
+@pytest_cases.fixture
+def linear_rms():
+    class LinearRMSModel(nn.Module):
+
+        def __init__(self) -> None:
+            super().__init__()
+            self.linear = nn.Linear(3, 4, bias=True)
+            self.linear.weight.data.fill_(2.)
+            self.linear.bias.data.fill_(1.)
+            self.rms = nn.RMSNorm(4)
+            self.rms.weight.data = torch.randn_like(self.rms.weight.data) # Change learned parameters
+            self.linear_1 = nn.Linear(4, 8, bias=True)
+            self.linear_1.weight.data.fill_(2.)
+
+        def forward(self, x):
+            x = self.linear(x)
+            x = self.rms(x)
+            x = self.linear_1(x)
+            return x
+
+    return LinearRMSModel
+
+
+list_of_rotation_mixtures = ['linear_rms']
+
+rotation_fixtures = fixture_union(
+    'rotation_fixtures', list_of_rotation_mixtures, ids=list_of_rotation_mixtures)
+
diff --git a/tests/brevitas/graph/test_equalization.py b/tests/brevitas/graph/test_equalization.py
index 89759b41a..d5e963405 100644
--- a/tests/brevitas/graph/test_equalization.py
+++ b/tests/brevitas/graph/test_equalization.py
@@ -7,7 +7,7 @@
 from torchvision import models
 
 from brevitas.fx import symbolic_trace
-from brevitas.graph.equalize import _batch_norm
+from brevitas.graph.equalize import GraphRotationEqualization, MergeLnAffine, _batch_norm
 from brevitas.graph.equalize import _extract_regions
 from brevitas.graph.equalize import _is_supported_module
 from brevitas.graph.equalize import activation_equalization_mode
@@ -18,210 +18,241 @@
 from .equalization_fixtures import *
 
 
-def test_resnet18_equalization():
-    model = models.resnet18(pretrained=True)
-
-    torch.manual_seed(SEED)
-    inp = torch.randn(IN_SIZE_CONV)
-    model.eval()
-    model = symbolic_trace(model)
-    expected_out = model(inp)
-
-    model_orig = copy.deepcopy(model)
-    regions = _extract_regions(model)
-    _ = equalize_test(
-        regions, merge_bias=True, bias_shrinkage='vaiq', scale_computation_type='maxabs')
-    out = model(inp)
-
-    # Check that equalization is not introducing FP variations
-    assert torch.allclose(expected_out, out, atol=ATOL)
-
-    regions = sorted(regions, key=lambda region: sorted([r for r in region.srcs_names]))
-    resnet_18_regions = sorted(RESNET_18_REGIONS, key=lambda region: region[0][0])
-    equalized_layers = set()
-    for r in resnet_18_regions:
-        equalized_layers.update(r[0])
-        equalized_layers.update(r[1])
-
-    # Check that we found all the expected regions
-    for region, expected_region in zip(regions, resnet_18_regions):
-        srcs = region.srcs_names
-        sources_check = set(srcs) == set(expected_region[0])
-        sinks = region.sinks_names
-        sinks_check = set(sinks) == set(expected_region[1])
-        assert sources_check
-        assert sinks_check
-
-    # Check that all layers were equalized and weights changed
-    for layer in equalized_layers:
-        eq_module = get_module(model, layer)
-        orig_module = get_module(model_orig, layer)
-        assert not torch.allclose(eq_module.weight, orig_module.weight)
-
-
-@pytest_cases.parametrize("merge_bias", [True, False])
-def test_equalization_torchvision_models(model_coverage: tuple, merge_bias: bool):
-    model, coverage = model_coverage
-
-    torch.manual_seed(SEED)
-    inp = torch.randn(IN_SIZE_CONV)
-    model.eval()
-    # The isistance does not work after symbolic trace
-    is_alexnet = isinstance(model, models.AlexNet)
-    model = symbolic_trace(model)
-    model = TorchFunctionalToModule().apply(model)
-
-    expected_out = model(inp)
-
-    regions = _extract_regions(model)
-    scale_factor_regions = equalize_test(
-        regions, merge_bias=merge_bias, bias_shrinkage='vaiq', scale_computation_type='maxabs')
-    shape_scale_regions = [scale.shape for scale in scale_factor_regions]
-
-    out = model(inp)
-    srcs = set()
-    sinks = set()
-    for r in regions:
-        srcs.update([x for x in list(r.srcs_names)])
-        sinks.update([x for x in list(r.sinks_names)])
-
-    count_region_srcs = 0
-    count_region_sinks = 0
-    for n in model.graph.nodes:
-        if _is_supported_module(model, n):
-            count_region_srcs += 1
-            if not isinstance(get_module(model, n.target), (nn.LayerNorm,) + _batch_norm):
-                count_region_sinks += 1
-
-    src_coverage = len(srcs) / count_region_srcs
-    sink_coverage = len(sinks) / count_region_sinks
-    assert src_coverage >= coverage[0]
-    assert sink_coverage >= coverage[1]
-    assert torch.allclose(expected_out, out, atol=ATOL)
-    # Graph equalization can exit in case of shape mismatches or other error without performing any
-    # equalization and returning a scalar value. We check that the equalized regions are as many as
-    # expected
-    if is_alexnet:
-        # In AlexNet, we cannot equalize only through one region
-        assert sum([shape == () for shape in shape_scale_regions]) == 1
-    else:
-        assert all([shape != () for shape in shape_scale_regions])
-
-
-@pytest_cases.parametrize("merge_bias", [True, False])
-def test_models(toy_model, merge_bias, request):
-    test_id = request.node.callspec.id
-
-    if 'mha' in test_id:
-        in_shape = IN_SIZE_LINEAR
-    else:
-        in_shape = IN_SIZE_CONV
-
-    model_class = toy_model
-    model = model_class()
-    inp = torch.randn(in_shape)
+# def test_resnet18_equalization():
+#     model = models.resnet18(pretrained=True)
+
+#     torch.manual_seed(SEED)
+#     inp = torch.randn(IN_SIZE_CONV)
+#     model.eval()
+#     model = symbolic_trace(model)
+#     expected_out = model(inp)
+
+#     model_orig = copy.deepcopy(model)
+#     regions = _extract_regions(model)
+#     _ = equalize_test(
+#         regions, merge_bias=True, bias_shrinkage='vaiq', scale_computation_type='maxabs')
+#     out = model(inp)
+
+#     # Check that equalization is not introducing FP variations
+#     assert torch.allclose(expected_out, out, atol=ATOL)
+
+#     regions = sorted(regions, key=lambda region: sorted([r for r in region.srcs_names]))
+#     resnet_18_regions = sorted(RESNET_18_REGIONS, key=lambda region: region[0][0])
+#     equalized_layers = set()
+#     for r in resnet_18_regions:
+#         equalized_layers.update(r[0])
+#         equalized_layers.update(r[1])
+
+#     # Check that we found all the expected regions
+#     for region, expected_region in zip(regions, resnet_18_regions):
+#         srcs = region.srcs_names
+#         sources_check = set(srcs) == set(expected_region[0])
+#         sinks = region.sinks_names
+#         sinks_check = set(sinks) == set(expected_region[1])
+#         assert sources_check
+#         assert sinks_check
+
+#     # Check that all layers were equalized and weights changed
+#     for layer in equalized_layers:
+#         eq_module = get_module(model, layer)
+#         orig_module = get_module(model_orig, layer)
+#         assert not torch.allclose(eq_module.weight, orig_module.weight)
+
+
+# @pytest_cases.parametrize("merge_bias", [True, False])
+# def test_equalization_torchvision_models(model_coverage: tuple, merge_bias: bool):
+#     model, coverage = model_coverage
+
+#     torch.manual_seed(SEED)
+#     inp = torch.randn(IN_SIZE_CONV)
+#     model.eval()
+#     # The isistance does not work after symbolic trace
+#     is_alexnet = isinstance(model, models.AlexNet)
+#     model = symbolic_trace(model)
+#     model = TorchFunctionalToModule().apply(model)
+
+#     expected_out = model(inp)
+
+#     regions = _extract_regions(model)
+#     scale_factor_regions = equalize_test(
+#         regions, merge_bias=merge_bias, bias_shrinkage='vaiq', scale_computation_type='maxabs')
+#     shape_scale_regions = [scale.shape for scale in scale_factor_regions]
+
+#     out = model(inp)
+#     srcs = set()
+#     sinks = set()
+#     for r in regions:
+#         srcs.update([x for x in list(r.srcs_names)])
+#         sinks.update([x for x in list(r.sinks_names)])
+
+#     count_region_srcs = 0
+#     count_region_sinks = 0
+#     for n in model.graph.nodes:
+#         if _is_supported_module(model, n):
+#             count_region_srcs += 1
+#             if not isinstance(get_module(model, n.target), (nn.LayerNorm,) + _batch_norm):
+#                 count_region_sinks += 1
+
+#     src_coverage = len(srcs) / count_region_srcs
+#     sink_coverage = len(sinks) / count_region_sinks
+#     assert src_coverage >= coverage[0]
+#     assert sink_coverage >= coverage[1]
+#     assert torch.allclose(expected_out, out, atol=ATOL)
+#     # Graph equalization can exit in case of shape mismatches or other error without performing any
+#     # equalization and returning a scalar value. We check that the equalized regions are as many as
+#     # expected
+#     if is_alexnet:
+#         # In AlexNet, we cannot equalize only through one region
+#         assert sum([shape == () for shape in shape_scale_regions]) == 1
+#     else:
+#         assert all([shape != () for shape in shape_scale_regions])
+
+
+# @pytest_cases.parametrize("merge_bias", [True, False])
+# def test_models(toy_model, merge_bias, request):
+#     test_id = request.node.callspec.id
+
+#     if 'mha' in test_id:
+#         in_shape = IN_SIZE_LINEAR
+#     else:
+#         in_shape = IN_SIZE_CONV
+
+#     model_class = toy_model
+#     model = model_class()
+#     inp = torch.randn(in_shape)
+
+#     model.eval()
+#     with torch.no_grad():
+#         expected_out = model(inp)
+
+#     model = symbolic_trace(model)
+#     regions = _extract_regions(model)
+#     scale_factor_regions = equalize_test(
+#         regions, merge_bias=merge_bias, bias_shrinkage='vaiq', scale_computation_type='maxabs')
+#     shape_scale_regions = [scale.shape for scale in scale_factor_regions]
+
+#     with torch.no_grad():
+#         out = model(inp)
+#     assert len(regions) > 0
+#     assert torch.allclose(expected_out, out, atol=ATOL)
+#     # Check that at least one region performs "true" equalization
+#     # If all shapes are scalar, no equalization has been performed
+#     if 'convgroupconv' in test_id:
+#         with pytest.raises(AssertionError):
+#             assert all([shape != () for shape in shape_scale_regions])
+#     else:
+#         assert all([shape != () for shape in shape_scale_regions])
+
+
+# @pytest_cases.parametrize("layerwise", [True, False])
+# def test_act_equalization_models(toy_model, layerwise, request):
+#     test_id = request.node.callspec.id
+
+#     if 'mha' in test_id:
+#         in_shape = IN_SIZE_LINEAR
+#     else:
+#         in_shape = IN_SIZE_CONV
+
+#     model_class = toy_model
+#     model = model_class()
+#     inp = torch.randn(in_shape)
+
+#     model.eval()
+#     expected_out = model(inp)
+#     model = symbolic_trace(model)
+#     with torch.no_grad():
+#         with activation_equalization_mode(model, 0.5, True, layerwise=layerwise) as aem:
+#             regions = aem.graph_act_eq.regions
+#             model(inp)
+#     scale_factor_regions = aem.scale_factors
+#     shape_scale_regions = [scale.shape for scale in scale_factor_regions]
+
+#     out = model(inp)
+#     assert torch.allclose(expected_out, out, atol=ATOL)
+
+#     # This region is made up of a residual branch, so no regions are found for act equalization
+#     if 'convgroupconv' in test_id:
+#         with pytest.raises(AssertionError):
+#             assert len(regions) > 0
+#             # Check that at least one region performs "true" equalization
+#             # If all shapes are scalar, no equalization has been performed
+#             assert all([shape != () for shape in shape_scale_regions])
+#     else:
+#         assert len(regions) > 0
+#         # Check that at least one region performs "true" equalization
+#         # If all shapes are scalar, no equalization has been performed
+#         assert all([shape != () for shape in shape_scale_regions])
+
+
+# @pytest_cases.parametrize(
+#     "model_dict", [(model_name, coverage) for model_name, coverage in MODELS.items()],
+#     ids=[model_name for model_name, _ in MODELS.items()])
+# @pytest_cases.parametrize("layerwise", [True, False])
+# def test_act_equalization_torchvision_models(model_dict: dict, layerwise: bool):
+#     model, coverage = model_dict
+
+#     if model == 'googlenet' and torch_version == version.parse('1.8.1'):
+#         pytest.skip(
+#             'Skip because of PyTorch error = AttributeError: \'function\' object has no attribute \'GoogLeNetOutputs\' '
+#         )
+#     if 'vit' in model and torch_version < version.parse('1.13'):
+#         pytest.skip(
+#             f'ViT supported from torch version 1.13, current torch version is {torch_version}')
+
+#     try:
+#         model = getattr(models, model)(pretrained=True, transform_input=False)
+#     except TypeError:
+#         model = getattr(models, model)(pretrained=True)
+
+#     torch.manual_seed(SEED)
+#     inp = torch.randn(IN_SIZE_CONV)
+#     model.eval()
+
+#     model = symbolic_trace(model)
+#     model = TorchFunctionalToModule().apply(model)
+#     model = DuplicateSharedStatelessModule().apply(model)
+#     expected_out = model(inp)
+
+#     with torch.no_grad():
+#         with activation_equalization_mode(model, 0.5, True, layerwise=layerwise) as aem:
+#             model(inp)
+#     scale_factor_regions = aem.scale_factors
+#     shape_scale_regions = [scale.shape for scale in scale_factor_regions]
+
+#     out = model(inp)
+
+#     assert torch.allclose(expected_out, out, atol=ATOL)
+#     # Check that at least one region performs "true" equalization
+#     # If all shapes are scalar, no equalization has been performed
+#     assert any([shape != () for shape in shape_scale_regions])
+
+
+def test_models(rotation_fixtures):
+
+    in_shape = IN_SIZE_LINEAR
+
+    model_class = rotation_fixtures
+    model = model_class().cuda()
+    inp = torch.ones(in_shape).cuda() 
 
     model.eval()
+    weight = copy.deepcopy(list(iter(model.parameters()))[-2])
     with torch.no_grad():
         expected_out = model(inp)
 
     model = symbolic_trace(model)
-    regions = _extract_regions(model)
-    scale_factor_regions = equalize_test(
-        regions, merge_bias=merge_bias, bias_shrinkage='vaiq', scale_computation_type='maxabs')
-    shape_scale_regions = [scale.shape for scale in scale_factor_regions]
+    merge = MergeLnAffine()
+    model = merge.apply(model)
+    eq = GraphRotationEqualization()
+    model = eq.apply(model)
 
     with torch.no_grad():
         out = model(inp)
-    assert len(regions) > 0
-    assert torch.allclose(expected_out, out, atol=ATOL)
-    # Check that at least one region performs "true" equalization
-    # If all shapes are scalar, no equalization has been performed
-    if 'convgroupconv' in test_id:
-        with pytest.raises(AssertionError):
-            assert all([shape != () for shape in shape_scale_regions])
-    else:
-        assert all([shape != () for shape in shape_scale_regions])
-
-
-@pytest_cases.parametrize("layerwise", [True, False])
-def test_act_equalization_models(toy_model, layerwise, request):
-    test_id = request.node.callspec.id
-
-    if 'mha' in test_id:
-        in_shape = IN_SIZE_LINEAR
-    else:
-        in_shape = IN_SIZE_CONV
-
-    model_class = toy_model
-    model = model_class()
-    inp = torch.randn(in_shape)
-
-    model.eval()
-    expected_out = model(inp)
-    model = symbolic_trace(model)
-    with torch.no_grad():
-        with activation_equalization_mode(model, 0.5, True, layerwise=layerwise) as aem:
-            regions = aem.graph_act_eq.regions
-            model(inp)
-    scale_factor_regions = aem.scale_factors
-    shape_scale_regions = [scale.shape for scale in scale_factor_regions]
-
-    out = model(inp)
-    assert torch.allclose(expected_out, out, atol=ATOL)
-
-    # This region is made up of a residual branch, so no regions are found for act equalization
-    if 'convgroupconv' in test_id:
-        with pytest.raises(AssertionError):
-            assert len(regions) > 0
-            # Check that at least one region performs "true" equalization
-            # If all shapes are scalar, no equalization has been performed
-            assert all([shape != () for shape in shape_scale_regions])
-    else:
-        assert len(regions) > 0
-        # Check that at least one region performs "true" equalization
-        # If all shapes are scalar, no equalization has been performed
-        assert all([shape != () for shape in shape_scale_regions])
-
-
-@pytest_cases.parametrize(
-    "model_dict", [(model_name, coverage) for model_name, coverage in MODELS.items()],
-    ids=[model_name for model_name, _ in MODELS.items()])
-@pytest_cases.parametrize("layerwise", [True, False])
-def test_act_equalization_torchvision_models(model_dict: dict, layerwise: bool):
-    model, coverage = model_dict
-
-    if model == 'googlenet' and torch_version == version.parse('1.8.1'):
-        pytest.skip(
-            'Skip because of PyTorch error = AttributeError: \'function\' object has no attribute \'GoogLeNetOutputs\' '
-        )
-    if 'vit' in model and torch_version < version.parse('1.13'):
-        pytest.skip(
-            f'ViT supported from torch version 1.13, current torch version is {torch_version}')
-
-    try:
-        model = getattr(models, model)(pretrained=True, transform_input=False)
-    except TypeError:
-        model = getattr(models, model)(pretrained=True)
-
-    torch.manual_seed(SEED)
-    inp = torch.randn(IN_SIZE_CONV)
-    model.eval()
-
-    model = symbolic_trace(model)
-    model = TorchFunctionalToModule().apply(model)
-    model = DuplicateSharedStatelessModule().apply(model)
-    expected_out = model(inp)
-
-    with torch.no_grad():
-        with activation_equalization_mode(model, 0.5, True, layerwise=layerwise) as aem:
-            model(inp)
-    scale_factor_regions = aem.scale_factors
-    shape_scale_regions = [scale.shape for scale in scale_factor_regions]
-
-    out = model(inp)
 
-    assert torch.allclose(expected_out, out, atol=ATOL)
-    # Check that at least one region performs "true" equalization
-    # If all shapes are scalar, no equalization has been performed
-    assert any([shape != () for shape in shape_scale_regions])
+    nweight = copy.deepcopy(list(iter(model.parameters()))[-2])
+    # Invariance of the output
+    assert torch.allclose(out, expected_out, atol=ATOL)
+    # Rotate weights must be different
+    assert not torch.allclose(weight, nweight)
+    # Merging affine parameters of RMS
+    assert torch.allclose(model.rms.weight.data, torch.ones_like(model.rms.weight.data))