Skip to content

Commit

Permalink
Add DailyLocationVisitsAlarms
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronfriedman6 committed Dec 17, 2024
1 parent bde6756 commit d98619a
Show file tree
Hide file tree
Showing 9 changed files with 320 additions and 4 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ jobs:
name: Updates changelog
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: dangoslen/changelog-enforcer@v3
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 2024-12-17
### Added
- Add DailyLocationVisits alarms checking that the Redshift daily_location_visits table has the right sites, has no duplicates, and contains mostly healthy data

## 2024-11-13
### Added
- Add BranchCodesMap alarms checking that it's in sync with all branches with location hours
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo
* When there are fewer than 10000 new location visits records for the previous day
* When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows
* When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows
* When the sites from the aggregated location visits don't perfectly match the known sites
* When there are duplicate aggregated location visits sites
* When less than 50% of sites had a healthy day of location visits
* When the number of active itype/location/stat group codes in Sierra and Redshift differs
* When there are duplicate active itype/location/stat group codes in Redshift
* When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated
Expand Down
2 changes: 2 additions & 0 deletions alarm_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms
from alarms.models.circ_trans_alarms import CircTransAlarms
from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms
from alarms.models.holds_alarms import HoldsAlarms
from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms
Expand Down Expand Up @@ -64,6 +65,7 @@ def _setup_alarms(self):
BranchCodesMapAlarms(self.redshift_client),
CircTransAlarms(self.redshift_client, self.sierra_client),
GranularLocationVisitsAlarms(self.redshift_client),
DailyLocationVisitsAlarms(self.redshift_client),
HoldsAlarms(self.redshift_client),
OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials),
PatronInfoAlarms(self.redshift_client, self.sierra_client),
Expand Down
76 changes: 76 additions & 0 deletions alarms/models/daily_location_visits_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from alarms.alarm import Alarm
from datetime import timedelta
from helpers.query_helper import build_redshift_daily_location_visits_query
from helpers.shoppertrak_sites import SHOPPERTRAK_SITES
from nypl_py_utils.functions.log_helper import create_log


class DailyLocationVisitsAlarms(Alarm):
def __init__(self, redshift_client):
super().__init__(redshift_client)
self.logger = create_log("daily_location_visits_alarms")

def run_checks(self):
date_to_test = (self.yesterday_date - timedelta(days=29)).isoformat()
self.logger.info(f"\nDAILY LOCATION VISITS: {date_to_test}\n")
redshift_table = "daily_location_visits" + self.redshift_suffix
redshift_query = build_redshift_daily_location_visits_query(
redshift_table, date_to_test
)

self.redshift_client.connect()
redshift_results = self.redshift_client.execute_query(redshift_query)
self.redshift_client.close_connection()

redshift_sites = []
redshift_healthy = []
for shoppertrak_site, is_all_healthy in redshift_results:
redshift_sites.append(shoppertrak_site)
redshift_healthy.append(int(is_all_healthy))

self.check_redshift_duplicate_sites_alarm(redshift_sites)
self.check_redshift_missing_sites_alarm(redshift_sites)
self.check_redshift_extra_sites_alarm(redshift_sites)
self.check_redshift_healthy_sites_alarm(redshift_healthy)

def check_redshift_duplicate_sites_alarm(self, redshift_sites):
seen_sites = set()
duplicate_sites = set()
for site in redshift_sites:
if site in seen_sites:
duplicate_sites.add(site)
seen_sites.add(site)

if duplicate_sites:
self.logger.error(
"The following ShopperTrak sites are duplicated: {}".format(
sorted(list(duplicate_sites))
)
)

def check_redshift_missing_sites_alarm(self, redshift_sites):
missing_sites = SHOPPERTRAK_SITES.difference(set(redshift_sites))
if missing_sites:
self.logger.error(
"The following ShopperTrak sites are missing: {}".format(
sorted(list(missing_sites))
)
)

def check_redshift_extra_sites_alarm(self, redshift_sites):
extra_sites = set(redshift_sites).difference(SHOPPERTRAK_SITES)
if extra_sites:
self.logger.error(
"The following unknown ShopperTrak site ids were found: {}".format(
sorted(list(extra_sites))
)
)

def check_redshift_healthy_sites_alarm(self, redshift_healthy):
percent_healthy = sum(redshift_healthy) / len(redshift_healthy)
if percent_healthy < 0.5:
self.logger.error(
"Only {0:.2f}% of ShopperTrak sites were healthy".format(
percent_healthy * 100
)
)
9 changes: 9 additions & 0 deletions helpers/query_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
"SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';"
)

_REDSHIFT_DAILY_LOCATION_VISITS_QUERY = (
"SELECT shoppertrak_site_id, is_all_healthy FROM {table} "
"WHERE visits_date = '{date}';"
)

_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = (
"SELECT COUNT(id) FROM {table} "
"WHERE increment_start::DATE = '{date}' AND is_fresh;"
Expand Down Expand Up @@ -192,6 +197,10 @@ def build_redshift_circ_trans_query(table, date_field, date):
)


def build_redshift_daily_location_visits_query(table, date):
return _REDSHIFT_DAILY_LOCATION_VISITS_QUERY.format(table=table, date=date)


def build_redshift_location_visits_count_query(table, date):
return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date)

Expand Down
110 changes: 110 additions & 0 deletions helpers/shoppertrak_sites.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
SHOPPERTRAK_SITES = set(
[
"ag",
"al",
"ba",
"bc",
"be",
"bl",
"bl - teen_center; interior",
"br",
"bt",
"ca",
"ch",
"ci",
"cl",
"cn",
"cp",
"cs",
"ct",
"dh",
"dy",
"ea",
"ep",
"ew",
"fe",
"ft",
"fw",
"fx",
"gd",
"gd - teen_center; interior",
"gk",
"hb",
"hb - teen_center; interior",
"hd",
"hf",
"hg",
"hk",
"hl",
"hp",
"hs",
"ht",
"hu",
"in",
"jm",
"jp",
"kb",
"kp",
"lb",
"lm",
"ma - 40th_st_1",
"ma - 40th_st_2",
"ma - 42nd_st",
"ma - 5th_ave",
"ma - bartos; interior",
"ma - gottesman; interior",
"ma - retail; interior",
"ma - social_sciences_library; interior",
"ma - visitor_center; interior",
"ma - wachenheim; interior",
"mb",
"mb - teen_center; interior",
"me",
"mh",
"mh - teen_center; interior",
"ml",
"mn",
"mo",
"mp",
"mr",
"mu",
"my",
"nb",
"nd",
"ns",
"ot",
"pk",
"pm",
"pr",
"rd",
"ri",
"rs",
"rt",
"sa",
"sb",
"sc",
"sd",
"se",
"sg",
"sg - teen_center; interior",
"sn",
"ss",
"st",
"sv",
"tg",
"th",
"tm",
"ts",
"tv",
"vc",
"vn",
"wb",
"wf",
"wh",
"wk",
"wl",
"wo",
"wt",
"yv",
]
)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.4.0
nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.6.2
selenium>=4.10.0
112 changes: 112 additions & 0 deletions tests/alarms/models/test_daily_location_visits_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import logging
import pytest

from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
from datetime import date


class TestDailyLocationVisitsAlarms:
@pytest.fixture
def test_instance(self, mocker, monkeypatch):
monkeypatch.setattr(
"alarms.models.daily_location_visits_alarms.SHOPPERTRAK_SITES",
set(["aa", "bb", "cc"]),
)
return DailyLocationVisitsAlarms(mocker.MagicMock())

def test_init(self, mocker):
location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock())
assert location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert location_visits_alarms.run_added_tests
assert location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert location_visits_alarms.yesterday == "2023-05-31"

def test_run_checks_no_alarm(self, test_instance, mocker, caplog):
mock_redshift_query = mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query",
return_value="redshift query",
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert caplog.text == ""

test_instance.redshift_client.connect.assert_called_once()
mock_redshift_query.assert_called_once_with(
"daily_location_visits_test_redshift_db", "2023-05-02"
)
test_instance.redshift_client.execute_query.assert_called_once_with(
"redshift query"
)
test_instance.redshift_client.close_connection.assert_called_once()

def test_run_checks_redshift_duplicate_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["bb", True],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert ("The following ShopperTrak sites are duplicated: ['bb']") in caplog.text

def test_run_checks_redshift_missing_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["cc", True],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert "The following ShopperTrak sites are missing: ['bb']" in caplog.text

def test_run_checks_redshift_extra_sites_alarm(self, test_instance, mocker, caplog):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["cc", False],
["ee", True],
["dd", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert (
"The following unknown ShopperTrak site ids were found: ['dd', 'ee']"
) in caplog.text

def test_run_checks_redshift_healthy_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", False],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert "Only 33.33% of ShopperTrak sites were healthy" in caplog.text

0 comments on commit d98619a

Please sign in to comment.