From d98619a480e9800637b861fd0ac84cab8b1184b4 Mon Sep 17 00:00:00 2001
From: aaronfriedman <aaronfriedman6@gmail.com>
Date: Tue, 17 Dec 2024 13:39:13 -0500
Subject: [PATCH] Add DailyLocationVisitsAlarms

---
 .github/workflows/run-tests.yml               |   6 +-
 CHANGELOG.md                                  |   4 +
 README.md                                     |   3 +
 alarm_controller.py                           |   2 +
 alarms/models/daily_location_visits_alarms.py |  76 ++++++++++++
 helpers/query_helper.py                       |   9 ++
 helpers/shoppertrak_sites.py                  | 110 +++++++++++++++++
 requirements.txt                              |   2 +-
 .../test_daily_location_visits_alarms.py      | 112 ++++++++++++++++++
 9 files changed, 320 insertions(+), 4 deletions(-)
 create mode 100644 alarms/models/daily_location_visits_alarms.py
 create mode 100644 helpers/shoppertrak_sites.py
 create mode 100644 tests/alarms/models/test_daily_location_visits_alarms.py

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 38961a2..06b0616 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -9,16 +9,16 @@ jobs:
     name: Updates changelog
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: dangoslen/changelog-enforcer@v3
   test:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python 3.12
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.12'
           cache: 'pip'
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1960cd4..fd529f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 2024-12-17
+### Added
+- Add DailyLocationVisits alarms checking that the Redshift daily_location_visits table has the right sites, has no duplicates, and contains mostly healthy data
+
 ## 2024-11-13
 ### Added
 - Add BranchCodesMap alarms checking that it's in sync with all branches with location hours
diff --git a/README.md b/README.md
index b6deb98..b217580 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,9 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo
 * When there are fewer than 10000 new location visits records for the previous day
 * When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows
 * When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows
+* When the sites from the aggregated location visits don't perfectly match the known sites
+* When there are duplicate aggregated location visits sites
+* When less than 50% of sites had a healthy day of location visits
 * When the number of active itype/location/stat group codes in Sierra and Redshift differs
 * When there are duplicate active itype/location/stat group codes in Redshift
 * When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated
diff --git a/alarm_controller.py b/alarm_controller.py
index bca137c..946e920 100644
--- a/alarm_controller.py
+++ b/alarm_controller.py
@@ -2,6 +2,7 @@
 
 from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms
 from alarms.models.circ_trans_alarms import CircTransAlarms
+from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
 from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms
 from alarms.models.holds_alarms import HoldsAlarms
 from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms
@@ -64,6 +65,7 @@ def _setup_alarms(self):
             BranchCodesMapAlarms(self.redshift_client),
             CircTransAlarms(self.redshift_client, self.sierra_client),
             GranularLocationVisitsAlarms(self.redshift_client),
+            DailyLocationVisitsAlarms(self.redshift_client),
             HoldsAlarms(self.redshift_client),
             OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials),
             PatronInfoAlarms(self.redshift_client, self.sierra_client),
diff --git a/alarms/models/daily_location_visits_alarms.py b/alarms/models/daily_location_visits_alarms.py
new file mode 100644
index 0000000..2284f88
--- /dev/null
+++ b/alarms/models/daily_location_visits_alarms.py
@@ -0,0 +1,76 @@
+from alarms.alarm import Alarm
+from datetime import timedelta
+from helpers.query_helper import build_redshift_daily_location_visits_query
+from helpers.shoppertrak_sites import SHOPPERTRAK_SITES
+from nypl_py_utils.functions.log_helper import create_log
+
+
+class DailyLocationVisitsAlarms(Alarm):
+    def __init__(self, redshift_client):
+        super().__init__(redshift_client)
+        self.logger = create_log("daily_location_visits_alarms")
+
+    def run_checks(self):
+        date_to_test = (self.yesterday_date - timedelta(days=29)).isoformat()
+        self.logger.info(f"\nDAILY LOCATION VISITS: {date_to_test}\n")
+        redshift_table = "daily_location_visits" + self.redshift_suffix
+        redshift_query = build_redshift_daily_location_visits_query(
+            redshift_table, date_to_test
+        )
+
+        self.redshift_client.connect()
+        redshift_results = self.redshift_client.execute_query(redshift_query)
+        self.redshift_client.close_connection()
+
+        redshift_sites = []
+        redshift_healthy = []
+        for shoppertrak_site, is_all_healthy in redshift_results:
+            redshift_sites.append(shoppertrak_site)
+            redshift_healthy.append(int(is_all_healthy))
+
+        self.check_redshift_duplicate_sites_alarm(redshift_sites)
+        self.check_redshift_missing_sites_alarm(redshift_sites)
+        self.check_redshift_extra_sites_alarm(redshift_sites)
+        self.check_redshift_healthy_sites_alarm(redshift_healthy)
+
+    def check_redshift_duplicate_sites_alarm(self, redshift_sites):
+        seen_sites = set()
+        duplicate_sites = set()
+        for site in redshift_sites:
+            if site in seen_sites:
+                duplicate_sites.add(site)
+            seen_sites.add(site)
+
+        if duplicate_sites:
+            self.logger.error(
+                "The following ShopperTrak sites are duplicated: {}".format(
+                    sorted(list(duplicate_sites))
+                )
+            )
+
+    def check_redshift_missing_sites_alarm(self, redshift_sites):
+        missing_sites = SHOPPERTRAK_SITES.difference(set(redshift_sites))
+        if missing_sites:
+            self.logger.error(
+                "The following ShopperTrak sites are missing: {}".format(
+                    sorted(list(missing_sites))
+                )
+            )
+
+    def check_redshift_extra_sites_alarm(self, redshift_sites):
+        extra_sites = set(redshift_sites).difference(SHOPPERTRAK_SITES)
+        if extra_sites:
+            self.logger.error(
+                "The following unknown ShopperTrak site ids were found: {}".format(
+                    sorted(list(extra_sites))
+                )
+            )
+
+    def check_redshift_healthy_sites_alarm(self, redshift_healthy):
+        percent_healthy = sum(redshift_healthy) / len(redshift_healthy)
+        if percent_healthy < 0.5:
+            self.logger.error(
+                "Only {0:.2f}% of ShopperTrak sites were healthy".format(
+                    percent_healthy * 100
+                )
+            )
diff --git a/helpers/query_helper.py b/helpers/query_helper.py
index 5c2f59f..1ffaf7c 100755
--- a/helpers/query_helper.py
+++ b/helpers/query_helper.py
@@ -14,6 +14,11 @@
     "SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';"
 )
 
+_REDSHIFT_DAILY_LOCATION_VISITS_QUERY = (
+    "SELECT shoppertrak_site_id, is_all_healthy FROM {table} "
+    "WHERE visits_date = '{date}';"
+)
+
 _REDSHIFT_LOCATION_VISITS_COUNT_QUERY = (
     "SELECT COUNT(id) FROM {table} "
     "WHERE increment_start::DATE = '{date}' AND is_fresh;"
@@ -192,6 +197,10 @@ def build_redshift_circ_trans_query(table, date_field, date):
     )
 
 
+def build_redshift_daily_location_visits_query(table, date):
+    return _REDSHIFT_DAILY_LOCATION_VISITS_QUERY.format(table=table, date=date)
+
+
 def build_redshift_location_visits_count_query(table, date):
     return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date)
 
diff --git a/helpers/shoppertrak_sites.py b/helpers/shoppertrak_sites.py
new file mode 100644
index 0000000..0134fde
--- /dev/null
+++ b/helpers/shoppertrak_sites.py
@@ -0,0 +1,110 @@
+SHOPPERTRAK_SITES = set(
+    [
+        "ag",
+        "al",
+        "ba",
+        "bc",
+        "be",
+        "bl",
+        "bl - teen_center; interior",
+        "br",
+        "bt",
+        "ca",
+        "ch",
+        "ci",
+        "cl",
+        "cn",
+        "cp",
+        "cs",
+        "ct",
+        "dh",
+        "dy",
+        "ea",
+        "ep",
+        "ew",
+        "fe",
+        "ft",
+        "fw",
+        "fx",
+        "gd",
+        "gd - teen_center; interior",
+        "gk",
+        "hb",
+        "hb - teen_center; interior",
+        "hd",
+        "hf",
+        "hg",
+        "hk",
+        "hl",
+        "hp",
+        "hs",
+        "ht",
+        "hu",
+        "in",
+        "jm",
+        "jp",
+        "kb",
+        "kp",
+        "lb",
+        "lm",
+        "ma - 40th_st_1",
+        "ma - 40th_st_2",
+        "ma - 42nd_st",
+        "ma - 5th_ave",
+        "ma - bartos; interior",
+        "ma - gottesman; interior",
+        "ma - retail; interior",
+        "ma - social_sciences_library; interior",
+        "ma - visitor_center; interior",
+        "ma - wachenheim; interior",
+        "mb",
+        "mb - teen_center; interior",
+        "me",
+        "mh",
+        "mh - teen_center; interior",
+        "ml",
+        "mn",
+        "mo",
+        "mp",
+        "mr",
+        "mu",
+        "my",
+        "nb",
+        "nd",
+        "ns",
+        "ot",
+        "pk",
+        "pm",
+        "pr",
+        "rd",
+        "ri",
+        "rs",
+        "rt",
+        "sa",
+        "sb",
+        "sc",
+        "sd",
+        "se",
+        "sg",
+        "sg - teen_center; interior",
+        "sn",
+        "ss",
+        "st",
+        "sv",
+        "tg",
+        "th",
+        "tm",
+        "ts",
+        "tv",
+        "vc",
+        "vn",
+        "wb",
+        "wf",
+        "wh",
+        "wk",
+        "wl",
+        "wo",
+        "wt",
+        "yv",
+    ]
+)
diff --git a/requirements.txt b/requirements.txt
index 6a389a9..1351339 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
-nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.4.0
+nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.6.2
 selenium>=4.10.0
\ No newline at end of file
diff --git a/tests/alarms/models/test_daily_location_visits_alarms.py b/tests/alarms/models/test_daily_location_visits_alarms.py
new file mode 100644
index 0000000..5a27c46
--- /dev/null
+++ b/tests/alarms/models/test_daily_location_visits_alarms.py
@@ -0,0 +1,112 @@
+import logging
+import pytest
+
+from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
+from datetime import date
+
+
+class TestDailyLocationVisitsAlarms:
+    @pytest.fixture
+    def test_instance(self, mocker, monkeypatch):
+        monkeypatch.setattr(
+            "alarms.models.daily_location_visits_alarms.SHOPPERTRAK_SITES",
+            set(["aa", "bb", "cc"]),
+        )
+        return DailyLocationVisitsAlarms(mocker.MagicMock())
+
+    def test_init(self, mocker):
+        location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock())
+        assert location_visits_alarms.redshift_suffix == "_test_redshift_db"
+        assert location_visits_alarms.run_added_tests
+        assert location_visits_alarms.yesterday_date == date(2023, 5, 31)
+        assert location_visits_alarms.yesterday == "2023-05-31"
+
+    def test_run_checks_no_alarm(self, test_instance, mocker, caplog):
+        mock_redshift_query = mocker.patch(
+            "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query",
+            return_value="redshift query",
+        )
+        test_instance.redshift_client.execute_query.return_value = (
+            ["aa", True],
+            ["bb", True],
+            ["cc", False],
+        )
+
+        with caplog.at_level(logging.ERROR):
+            test_instance.run_checks()
+        assert caplog.text == ""
+
+        test_instance.redshift_client.connect.assert_called_once()
+        mock_redshift_query.assert_called_once_with(
+            "daily_location_visits_test_redshift_db", "2023-05-02"
+        )
+        test_instance.redshift_client.execute_query.assert_called_once_with(
+            "redshift query"
+        )
+        test_instance.redshift_client.close_connection.assert_called_once()
+
+    def test_run_checks_redshift_duplicate_sites_alarm(
+        self, test_instance, mocker, caplog
+    ):
+        mocker.patch(
+            "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
+        )
+        test_instance.redshift_client.execute_query.return_value = (
+            ["aa", True],
+            ["bb", True],
+            ["bb", True],
+            ["cc", False],
+        )
+
+        with caplog.at_level(logging.ERROR):
+            test_instance.run_checks()
+        assert ("The following ShopperTrak sites are duplicated: ['bb']") in caplog.text
+
+    def test_run_checks_redshift_missing_sites_alarm(
+        self, test_instance, mocker, caplog
+    ):
+        mocker.patch(
+            "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
+        )
+        test_instance.redshift_client.execute_query.return_value = (
+            ["aa", True],
+            ["cc", True],
+        )
+
+        with caplog.at_level(logging.ERROR):
+            test_instance.run_checks()
+        assert "The following ShopperTrak sites are missing: ['bb']" in caplog.text
+
+    def test_run_checks_redshift_extra_sites_alarm(self, test_instance, mocker, caplog):
+        mocker.patch(
+            "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
+        )
+        test_instance.redshift_client.execute_query.return_value = (
+            ["aa", True],
+            ["bb", True],
+            ["cc", False],
+            ["ee", True],
+            ["dd", False],
+        )
+
+        with caplog.at_level(logging.ERROR):
+            test_instance.run_checks()
+        assert (
+            "The following unknown ShopperTrak site ids were found: ['dd', 'ee']"
+        ) in caplog.text
+
+    def test_run_checks_redshift_healthy_sites_alarm(
+        self, test_instance, mocker, caplog
+    ):
+        mocker.patch(
+            "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
+        )
+        test_instance.redshift_client.execute_query.return_value = (
+            ["aa", True],
+            ["bb", False],
+            ["cc", False],
+        )
+
+        with caplog.at_level(logging.ERROR):
+            test_instance.run_checks()
+        assert "Only 33.33% of ShopperTrak sites were healthy" in caplog.text