From 7fb5b62283a4ae3359c76d6818e7714fd98fabc4 Mon Sep 17 00:00:00 2001
From: Staci Cooper <staci.cooper@automattic.com>
Date: Tue, 15 Oct 2024 16:28:37 -0700
Subject: [PATCH] Update and add tests

---
 .../dags/common/loader/test_reporting.py      | 72 ++++++++++++++++---
 1 file changed, 63 insertions(+), 9 deletions(-)

diff --git a/catalog/tests/dags/common/loader/test_reporting.py b/catalog/tests/dags/common/loader/test_reporting.py
index cbe54f17a07..5f4e068617d 100644
--- a/catalog/tests/dags/common/loader/test_reporting.py
+++ b/catalog/tests/dags/common/loader/test_reporting.py
@@ -7,6 +7,7 @@
     RecordMetrics,
     clean_duration,
     clean_record_counts,
+    detect_missing_records,
     report_completion,
     skip_report_completion,
 )
@@ -38,6 +39,37 @@ def test_report_completion(should_send_message):
         send_message_mock.called = should_send_message
 
 
+@pytest.mark.parametrize(
+    "dated, record_counts_by_media_type, expected_result",
+    [
+        # True if non-dated, and no media types have records upserted
+        (False, {"image": RecordMetrics(0, 0, 0, 0)}, True),
+        (
+            False,
+            {"image": RecordMetrics(0, 0, 0, 0), "audio": RecordMetrics(0, 0, 0, 0)},
+            True,
+        ),
+        # Handles null counts
+        (False, {"image": None}, True),
+        # Handles null upserted counts
+        (False, {"image": RecordMetrics(None, 0, 0, 0)}, True),
+        # False if _any_ media type had records upserted
+        (False, {"image": RecordMetrics(100, 0, 0, 0)}, False),
+        (
+            False,
+            {"image": RecordMetrics(0, 0, 0, 0), "audio": RecordMetrics(100, 0, 0, 0)},
+            False,
+        ),
+        # Always False if DAG is dated
+        (True, {"image": RecordMetrics(100, 0, 0, 0)}, False),
+        (True, {"image": None}, False),
+        (True, {"image": RecordMetrics(None, 0, 0, 0)}, False),
+    ],
+)
+def test_detect_missing_records(dated, record_counts_by_media_type, expected_result):
+    assert detect_missing_records(dated, record_counts_by_media_type) == expected_result
+
+
 def _make_report_completion_contents_data(media_type: str):
     return [
         # Happy path
@@ -158,6 +190,9 @@ def test_report_completion_contents(
     with mock.patch("common.loader.reporting.send_message"):
         record_counts_by_media_type = {**audio_data, **image_data}
         should_skip = skip_report_completion(None, record_counts_by_media_type)
+        should_alert_missing_records = detect_missing_records(
+            dated, record_counts_by_media_type
+        )
         try:
             message = report_completion(
                 "jamendo_workflow",
@@ -171,10 +206,22 @@ def test_report_completion_contents(
         except AirflowSkipException:
             assert should_skip, "AirflowSkipException raised unexpectedly"
             return
+        except ValueError:
+            assert should_alert_missing_records, "ValueError raised unexpectedly"
+            return
+
+        # Assert that if we were supposed to skip or alert missing records, we did not
+        # get this far
+        assert not should_skip, "Completion was reported when it should have skipped."
+        assert (
+            not should_alert_missing_records
+        ), "Completion was reported instead of alerting missing records."
+
         for expected in [audio_expected, image_expected]:
             assert (
                 expected in message
             ), "Completion message doesn't contain expected text"
+
         # Split message into "sections"
         parts = message.strip().split("\n")
         # Get the date section
@@ -216,17 +263,24 @@ def test_report_completion_contents_with_lists(
         record_counts_by_media_type = [
             {**audio, **image} for audio, image in zip(audio_data, image_data)
         ]
-
-        message = report_completion(
-            "Jamendo",
-            ["audio", "image"],
-            None,
-            record_counts_by_media_type,
-            dated,
-            date_range_start,
-            date_range_end,
+        should_alert_missing_records = detect_missing_records(
+            dated, clean_record_counts(record_counts_by_media_type, ["audio", "image"])
         )
 
+        try:
+            message = report_completion(
+                "Jamendo",
+                ["audio", "image"],
+                None,
+                record_counts_by_media_type,
+                dated,
+                date_range_start,
+                date_range_end,
+            )
+        except ValueError:
+            assert should_alert_missing_records, "ValueError raised unexpectedly"
+            return
+
         for expected in [audio_expected, image_expected]:
             assert (
                 expected in message