From d98619a480e9800637b861fd0ac84cab8b1184b4 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 17 Dec 2024 13:39:13 -0500 Subject: [PATCH] Add DailyLocationVisitsAlarms --- .github/workflows/run-tests.yml | 6 +- CHANGELOG.md | 4 + README.md | 3 + alarm_controller.py | 2 + alarms/models/daily_location_visits_alarms.py | 76 ++++++++++++ helpers/query_helper.py | 9 ++ helpers/shoppertrak_sites.py | 110 +++++++++++++++++ requirements.txt | 2 +- .../test_daily_location_visits_alarms.py | 112 ++++++++++++++++++ 9 files changed, 320 insertions(+), 4 deletions(-) create mode 100644 alarms/models/daily_location_visits_alarms.py create mode 100644 helpers/shoppertrak_sites.py create mode 100644 tests/alarms/models/test_daily_location_visits_alarms.py diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 38961a2..06b0616 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -9,16 +9,16 @@ jobs: name: Updates changelog runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: dangoslen/changelog-enforcer@v3 test: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.12' cache: 'pip' diff --git a/CHANGELOG.md b/CHANGELOG.md index 1960cd4..fd529f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2024-12-17 +### Added +- Add DailyLocationVisits alarms checking that the Redshift daily_location_visits table has the right sites, has no duplicates, and contains mostly healthy data + ## 2024-11-13 ### Added - Add BranchCodesMap alarms checking that it's in sync with all branches with location hours diff --git a/README.md b/README.md index b6deb98..b217580 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,9 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo * When there are fewer than 10000 new location visits records for the previous day * When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows * When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows +* When the sites from the aggregated location visits don't perfectly match the known sites +* When there are duplicate aggregated location visits sites +* When less than 50% of sites had a healthy day of location visits * When the number of active itype/location/stat group codes in Sierra and Redshift differs * When there are duplicate active itype/location/stat group codes in Redshift * When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated diff --git a/alarm_controller.py b/alarm_controller.py index bca137c..946e920 100644 --- a/alarm_controller.py +++ b/alarm_controller.py @@ -2,6 +2,7 @@ from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms from alarms.models.circ_trans_alarms import CircTransAlarms +from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms from alarms.models.holds_alarms import HoldsAlarms from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms @@ -64,6 +65,7 @@ def _setup_alarms(self): BranchCodesMapAlarms(self.redshift_client), CircTransAlarms(self.redshift_client, self.sierra_client), GranularLocationVisitsAlarms(self.redshift_client), + DailyLocationVisitsAlarms(self.redshift_client), HoldsAlarms(self.redshift_client), OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials), PatronInfoAlarms(self.redshift_client, self.sierra_client), diff --git a/alarms/models/daily_location_visits_alarms.py b/alarms/models/daily_location_visits_alarms.py new file mode 100644 index 0000000..2284f88 --- /dev/null +++ b/alarms/models/daily_location_visits_alarms.py @@ -0,0 +1,76 @@ +from alarms.alarm import Alarm +from datetime import timedelta +from helpers.query_helper import build_redshift_daily_location_visits_query +from helpers.shoppertrak_sites import SHOPPERTRAK_SITES +from nypl_py_utils.functions.log_helper import create_log + + +class DailyLocationVisitsAlarms(Alarm): + def __init__(self, redshift_client): + super().__init__(redshift_client) + self.logger = create_log("daily_location_visits_alarms") + + def run_checks(self): + date_to_test = (self.yesterday_date - timedelta(days=29)).isoformat() + self.logger.info(f"\nDAILY LOCATION VISITS: {date_to_test}\n") + redshift_table = "daily_location_visits" + self.redshift_suffix + redshift_query = build_redshift_daily_location_visits_query( + redshift_table, date_to_test + ) + + self.redshift_client.connect() + redshift_results = self.redshift_client.execute_query(redshift_query) + self.redshift_client.close_connection() + + redshift_sites = [] + redshift_healthy = [] + for shoppertrak_site, is_all_healthy in redshift_results: + redshift_sites.append(shoppertrak_site) + redshift_healthy.append(int(is_all_healthy)) + + self.check_redshift_duplicate_sites_alarm(redshift_sites) + self.check_redshift_missing_sites_alarm(redshift_sites) + self.check_redshift_extra_sites_alarm(redshift_sites) + self.check_redshift_healthy_sites_alarm(redshift_healthy) + + def check_redshift_duplicate_sites_alarm(self, redshift_sites): + seen_sites = set() + duplicate_sites = set() + for site in redshift_sites: + if site in seen_sites: + duplicate_sites.add(site) + seen_sites.add(site) + + if duplicate_sites: + self.logger.error( + "The following ShopperTrak sites are duplicated: {}".format( + sorted(list(duplicate_sites)) + ) + ) + + def check_redshift_missing_sites_alarm(self, redshift_sites): + missing_sites = SHOPPERTRAK_SITES.difference(set(redshift_sites)) + if missing_sites: + self.logger.error( + "The following ShopperTrak sites are missing: {}".format( + sorted(list(missing_sites)) + ) + ) + + def check_redshift_extra_sites_alarm(self, redshift_sites): + extra_sites = set(redshift_sites).difference(SHOPPERTRAK_SITES) + if extra_sites: + self.logger.error( + "The following unknown ShopperTrak site ids were found: {}".format( + sorted(list(extra_sites)) + ) + ) + + def check_redshift_healthy_sites_alarm(self, redshift_healthy): + percent_healthy = sum(redshift_healthy) / len(redshift_healthy) + if percent_healthy < 0.5: + self.logger.error( + "Only {0:.2f}% of ShopperTrak sites were healthy".format( + percent_healthy * 100 + ) + ) diff --git a/helpers/query_helper.py b/helpers/query_helper.py index 5c2f59f..1ffaf7c 100755 --- a/helpers/query_helper.py +++ b/helpers/query_helper.py @@ -14,6 +14,11 @@ "SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';" ) +_REDSHIFT_DAILY_LOCATION_VISITS_QUERY = ( + "SELECT shoppertrak_site_id, is_all_healthy FROM {table} " + "WHERE visits_date = '{date}';" +) + _REDSHIFT_LOCATION_VISITS_COUNT_QUERY = ( "SELECT COUNT(id) FROM {table} " "WHERE increment_start::DATE = '{date}' AND is_fresh;" @@ -192,6 +197,10 @@ def build_redshift_circ_trans_query(table, date_field, date): ) +def build_redshift_daily_location_visits_query(table, date): + return _REDSHIFT_DAILY_LOCATION_VISITS_QUERY.format(table=table, date=date) + + def build_redshift_location_visits_count_query(table, date): return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) diff --git a/helpers/shoppertrak_sites.py b/helpers/shoppertrak_sites.py new file mode 100644 index 0000000..0134fde --- /dev/null +++ b/helpers/shoppertrak_sites.py @@ -0,0 +1,110 @@ +SHOPPERTRAK_SITES = set( + [ + "ag", + "al", + "ba", + "bc", + "be", + "bl", + "bl - teen_center; interior", + "br", + "bt", + "ca", + "ch", + "ci", + "cl", + "cn", + "cp", + "cs", + "ct", + "dh", + "dy", + "ea", + "ep", + "ew", + "fe", + "ft", + "fw", + "fx", + "gd", + "gd - teen_center; interior", + "gk", + "hb", + "hb - teen_center; interior", + "hd", + "hf", + "hg", + "hk", + "hl", + "hp", + "hs", + "ht", + "hu", + "in", + "jm", + "jp", + "kb", + "kp", + "lb", + "lm", + "ma - 40th_st_1", + "ma - 40th_st_2", + "ma - 42nd_st", + "ma - 5th_ave", + "ma - bartos; interior", + "ma - gottesman; interior", + "ma - retail; interior", + "ma - social_sciences_library; interior", + "ma - visitor_center; interior", + "ma - wachenheim; interior", + "mb", + "mb - teen_center; interior", + "me", + "mh", + "mh - teen_center; interior", + "ml", + "mn", + "mo", + "mp", + "mr", + "mu", + "my", + "nb", + "nd", + "ns", + "ot", + "pk", + "pm", + "pr", + "rd", + "ri", + "rs", + "rt", + "sa", + "sb", + "sc", + "sd", + "se", + "sg", + "sg - teen_center; interior", + "sn", + "ss", + "st", + "sv", + "tg", + "th", + "tm", + "ts", + "tv", + "vc", + "vn", + "wb", + "wf", + "wh", + "wk", + "wl", + "wo", + "wt", + "yv", + ] +) diff --git a/requirements.txt b/requirements.txt index 6a389a9..1351339 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.4.0 +nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.6.2 selenium>=4.10.0 \ No newline at end of file diff --git a/tests/alarms/models/test_daily_location_visits_alarms.py b/tests/alarms/models/test_daily_location_visits_alarms.py new file mode 100644 index 0000000..5a27c46 --- /dev/null +++ b/tests/alarms/models/test_daily_location_visits_alarms.py @@ -0,0 +1,112 @@ +import logging +import pytest + +from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms +from datetime import date + + +class TestDailyLocationVisitsAlarms: + @pytest.fixture + def test_instance(self, mocker, monkeypatch): + monkeypatch.setattr( + "alarms.models.daily_location_visits_alarms.SHOPPERTRAK_SITES", + set(["aa", "bb", "cc"]), + ) + return DailyLocationVisitsAlarms(mocker.MagicMock()) + + def test_init(self, mocker): + location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock()) + assert location_visits_alarms.redshift_suffix == "_test_redshift_db" + assert location_visits_alarms.run_added_tests + assert location_visits_alarms.yesterday_date == date(2023, 5, 31) + assert location_visits_alarms.yesterday == "2023-05-31" + + def test_run_checks_no_alarm(self, test_instance, mocker, caplog): + mock_redshift_query = mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query", + return_value="redshift query", + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", True], + ["cc", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert caplog.text == "" + + test_instance.redshift_client.connect.assert_called_once() + mock_redshift_query.assert_called_once_with( + "daily_location_visits_test_redshift_db", "2023-05-02" + ) + test_instance.redshift_client.execute_query.assert_called_once_with( + "redshift query" + ) + test_instance.redshift_client.close_connection.assert_called_once() + + def test_run_checks_redshift_duplicate_sites_alarm( + self, test_instance, mocker, caplog + ): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", True], + ["bb", True], + ["cc", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert ("The following ShopperTrak sites are duplicated: ['bb']") in caplog.text + + def test_run_checks_redshift_missing_sites_alarm( + self, test_instance, mocker, caplog + ): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["cc", True], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert "The following ShopperTrak sites are missing: ['bb']" in caplog.text + + def test_run_checks_redshift_extra_sites_alarm(self, test_instance, mocker, caplog): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", True], + ["cc", False], + ["ee", True], + ["dd", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert ( + "The following unknown ShopperTrak site ids were found: ['dd', 'ee']" + ) in caplog.text + + def test_run_checks_redshift_healthy_sites_alarm( + self, test_instance, mocker, caplog + ): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", False], + ["cc", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert "Only 33.33% of ShopperTrak sites were healthy" in caplog.text