add test

kshitij12345 · kshitij12345 · commit cf0dd38f8a66 · 2025-04-30T12:08:31.000+02:00
diff --git a/tests/pytorch/test_sanity.py b/tests/pytorch/test_sanity.py
@@ -1338,3 +1338,24 @@ def backward(ctx, grad_output):
 
     # Assert that gradients are the same
     torch.testing.assert_close(grad_checkpoint, grad_standard)
+
+def test_linear_frozen_weights_memory_default_recipe():
+    """Test that memory usage is optimized when weights are frozen for MXFP8."""
+    dim = 1024
+    linear = Linear(dim, dim, bias=False)
+    x = torch.randn(dim, dim, requires_grad=True, device="cuda")
+
+    # Freeze weights
+    linear.weight.requires_grad = False
+    
+    # Forward and backward pass with FP8
+    with fp8_autocast():
+        o = linear(x)
+        g_o = torch.randn_like(o)
+    
+    max_memory_before_backward = torch.cuda.max_memory_allocated()
+    o.backward(g_o)
+    max_memory_after_backward = torch.cuda.max_memory_allocated()
+    
+    memory_diff = (max_memory_after_backward - max_memory_before_backward) / 1e6
+    assert memory_diff < 5.5, f"Memory usage with frozen weights ({memory_diff}MB) should be less than 5.5MB as the grad_output should be quantized only columnwise."