moj-analytical-services · sama-ds · Nov 8, 2023 · Jun 16, 2023 · Jun 16, 2023 · Jun 16, 2023
diff --git a/splink/profile_data.py b/splink/profile_data.py
@@ -1,9 +1,12 @@
+import logging
 import re
 from copy import deepcopy
 
 from .charts import altair_or_json, load_chart_definition
 from .misc import ensure_is_list
 
+logger = logging.getLogger(__name__)
+
 
 def _group_name(cols_or_expr):
     cols_or_expr = re.sub(r"[^0-9a-zA-Z_]", " ", cols_or_expr)
@@ -270,21 +273,35 @@ def profile_columns(linker, column_expressions=None, top_n=10, bottom_n=10):
         percentile_rows = [
             p for p in percentile_rows_all if p["group_name"] == _group_name(expression)
         ]
-        percentile_rows = _add_100_percentile_to_df_percentiles(percentile_rows)
-        top_n_rows = [
-            p for p in top_n_rows_all if p["group_name"] == _group_name(expression)
-        ]
-        bottom_n_rows = [
-            p for p in bottom_n_rows_all if p["group_name"] == _group_name(expression)
-        ]
-        # remove concat blank from expression title
-        expression = expression.replace(", ' '", "")
-        inner_chart = _get_inner_chart_spec_freq(
-            percentile_rows, top_n_rows, bottom_n_rows, expression
-        )
-        inner_charts.append(inner_chart)
-    outer_spec = deepcopy(_outer_chart_spec_freq)
-
-    outer_spec["vconcat"] = inner_charts
-
-    return altair_or_json(outer_spec)
+        if percentile_rows == []:
+            logger.warning(
+                "Warning: No charts produced for "
+                f"{expression}"
+                " as the column only contains null values."
+            )
+        else:
+            percentile_rows = _add_100_percentile_to_df_percentiles(percentile_rows)
+            top_n_rows = [
+                p for p in top_n_rows_all if p["group_name"] == _group_name(expression)
+            ]
+            bottom_n_rows = [
+                p
+                for p in bottom_n_rows_all
+                if p["group_name"] == _group_name(expression)
+            ]
+            # remove concat blank from expression title
+            expression = expression.replace(", ' '", "")
+            inner_chart = _get_inner_chart_spec_freq(
+                percentile_rows, top_n_rows, bottom_n_rows, expression
+            )
+            inner_charts.append(inner_chart)
+
+    if inner_charts != []:
+
+        outer_spec = deepcopy(_outer_chart_spec_freq)
+        outer_spec["vconcat"] = inner_charts
+
+        return altair_or_json(outer_spec)
+
+    else:
+        return None
diff --git a/tests/test_profile_data.py b/tests/test_profile_data.py
@@ -176,3 +176,23 @@ def test_profile_using_spark(df_spark):
     )
 
     assert len(generate_raw_profile_dataset([["first_name", "blank"]], linker)) == 0
+
+
+def test_profile_null_columns(caplog):
+
+    df = pd.DataFrame(
+        [
+            {"unique_id": 1, "test_1": 1, "test_2": None},
+        ]
+    )
+
+    linker = DuckDBLinker(df)
+
+    linker.profile_columns(["test_1", "test_2"])
+
+    captured_logs = caplog.text
+
+    assert (
+        "Warning: No charts produced for test_2 as the column only contains null values."
-    assert (
-        "Warning: No charts produced for test_2 as the column only contains null values."
+assert (
+    "Warning: No charts produced for test_2 as the column only contains "
+    "null values."
+)
-    assert (
-        "Warning: No charts produced for test_2 as the column only contains null values."
+assert (
+    "Warning: No charts produced for test_2 as the column only contains "
+    "null values."
+)
+        in captured_logs
+    )