NVIDIA · pggPL · Jun 3, 2025 · Jun 3, 2025 · Jun 9, 2025 · Jun 9, 2025
diff --git a/tests/pytorch/debug/test_numerics.py b/tests/pytorch/debug/test_numerics.py
@@ -262,6 +262,18 @@ def _get_tensors():
     return x, weight
 
 
+LOGGING_CONFIG = """logging_config:
+  enabled: True
+  layers:
+    layer_types: [linear]
+  transformer_engine:
+    LogTensorStats:
+      enabled: True
+      tensors: [activation, gradient, weight, output, wgrad, dgrad]
+      stats: [min, max, mean, std, l1_norm, l2_norm, cur_amax, dynamic_range]
+"""
+
+
 DISABLE_FP8_CONFIG = Template(
     """disable_fp8_config:
   enabled: True
@@ -275,6 +287,24 @@ def _get_tensors():
 )
 
 
+@create_config_file
+def run_logging_zero_numel_tensor(feature_dirs, **kwargs):
+    kwargs["config_file"].write(LOGGING_CONFIG)
+    kwargs["config_file"].flush()
+
+    _init_debug(kwargs["config_file"].name, kwargs["log_dir"], feature_dirs)
+
+    x, weight = _get_tensors()
+    x1 = x[:0, :]
+    model = _init_model(weight)
+    _ = _run_forward_backward(x1, model)
+    _ = _run_forward_backward(x, model)
+
+
+def test_logging_zero_numel_tensor(feature_dirs):
+    run_logging_zero_numel_tensor(feature_dirs)
+
+
 @pytest.mark.parametrize("fprop_fp8", all_boolean)
 @pytest.mark.parametrize("dgrad_fp8", all_boolean)
 @pytest.mark.parametrize("wgrad_fp8", all_boolean)

diff --git a/transformer_engine/debug/features/utils/stats_buffer.py b/transformer_engine/debug/features/utils/stats_buffer.py
@@ -85,6 +85,13 @@ def feed(self, tensor, iteration):
         if self.modified[0] and not self.reduce_within_microbatch:
             return
 
+        if (
+            tensor.numel() == 0
+            if hasattr(tensor, "numel")
+            else all((t is None or t.numel() == 0) for t in tensor.get_data_tensors())
+        ):
+            return
+
         # save stats for tensor to tmp buffer
         for stat_name in self.stats_to_compute:
             fn, _ = STATS[stat_name]

diff --git a/transformer_engine/debug/features/utils/stats_computation.py b/transformer_engine/debug/features/utils/stats_computation.py
@@ -17,6 +17,8 @@ def _compute_dynamic_range_top(tensor):
     """Computes the log2 of the amax of the tensor"""
     tensor_abs = tensor.abs()
     tensor_abs = tensor_abs[tensor_abs != 0]
+    if tensor_abs.numel() == 0:
+        return torch.inf
     amax = tensor_abs.max().float()
     if not amax.all():
         amax = torch.tensor(1, device=tensor.device).to(torch.float)
@@ -125,7 +127,7 @@ def _get(buffers, stat_name):
         lambda buffers: min(_get(buffers, "dynamic_range_bottom")),
     ),
     "underflows_num": (
-        lambda x: (x._data == 0).sum(),
+        lambda x: (x.get_data_tensors()[0] == 0).sum(),
         lambda buffers: sum(_get(buffers, "underflows_num")),
     ),
     "std": (