From 76385bc49e2cdcc9b5d6becf8e0325de7f299df6 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Tue, 26 Nov 2024 13:30:49 +0200 Subject: [PATCH 01/17] create report --- corehq/apps/hqadmin/reports.py | 64 ++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index 518e70c57ec3..3b2bff82928f 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -1,3 +1,5 @@ +from datetime import datetime, timedelta + from django.urls import reverse from django.utils.functional import cached_property from django.utils.html import format_html @@ -24,6 +26,7 @@ from corehq.const import SERVER_DATETIME_FORMAT from corehq.apps.hqadmin.models import HqDeploy from corehq.apps.es.cases import CaseES +from corehq.apps.es.case_search import CaseSearchES from corehq.apps.es.forms import FormES from corehq.toggles import USER_CONFIGURABLE_REPORTS, RESTRICT_DATA_SOURCE_REBUILD from corehq.motech.repeaters.const import UCRRestrictionFFStatus @@ -495,3 +498,64 @@ def headers(self): @property def rows(self): return self.table_data.rows + + +class StaleCasesTable: + + STALE_DATE_THRESHOLD_DAYS = 365 + + @property + def headers(self): + return DataTablesHeader( + DataTablesColumn(gettext_lazy("Domain")), + DataTablesColumn(gettext_lazy("Case count")) + ) + + @property + def rows(self): + rows = [] + case_count_by_domain = self._stale_case_count() + for bucket in case_count_by_domain.values(): + rows.append([bucket.key, bucket.doc_count]) + return rows + + def _stale_case_count(self): + return ( + CaseSearchES() + .is_closed(False) + .server_modified_range(lt=self._get_stale_date()) + .size(0) + .aggregation( + TermsAggregation('domain', 'domain.exact') + ) + ).run().aggregations.domain.buckets_dict + + def _get_stale_date(self): + current_date = datetime.now() + stale_threshold_date = current_date - timedelta(days=self.STALE_DATE_THRESHOLD_DAYS) + return stale_threshold_date + + @staticmethod + def format_as_table(row_data, data_tables_header): + """ + Formats a given set of `row_data` with `headers` into a str formatted table that looks as follows: + + ``` + Header_1 | Header_2 | etc... + --------------------------------- + Alice | 25 | New York + Bob | 30 | Los Angeles + Charlie | 35 | Chicago + ``` + """ + # Calculate width of each col + headers = [str(header.html) for header in data_tables_header] + col_widths = [max(len(str(row[i])) for row in [headers] + row_data) for i in range(len(headers))] + row_format = " | ".join(f"{{:<{w}}}" for w in col_widths) + + lines = [] + lines.append(row_format.format(*headers)) + lines.append("-" * (sum(col_widths) + 3 * (len(headers) - 1))) # Divider + for row in row_data: + lines.append(row_format.format(*row)) + return '\n'.join(lines) From c74277303fefae5a1100dc10dde62676bf6957b4 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Tue, 26 Nov 2024 14:18:05 +0200 Subject: [PATCH 02/17] create periodic task --- corehq/apps/hqwebapp/tasks.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index 22ed5c8c84ec..45bac6d08321 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -305,3 +305,34 @@ def send_domain_ucr_data_info_to_admins(): send_mail_async.delay( subject, message, [settings.SOLUTIONS_AES_EMAIL] ) + + +@periodic_task(run_every=crontab(minute=0, hour=1, day_of_month=1)) +def send_stale_case_data_info_to_admins(): + from corehq.apps.hqadmin.reports import StaleCasesTable + + if not settings.SOLUTIONS_AES_EMAIL: + return + + table = StaleCasesTable() + row_data = table.rows + num_domains = len(row_data) + subject = ( + f'Monthly report: {num_domains} domains containing stale ' + f'case data (older than {table.STALE_DATE_THRESHOLD_DAYS} days)' + ) + if num_domains: + message = ( + f'We have identified {num_domains} domains containing stale ' + f'case data older than {table.STALE_DATE_THRESHOLD_DAYS} days.\n' + 'Please see detailed report below:\n' + f'{table.format_as_table(row_data, table.headers)}' + ) + else: + message = ( + 'No domains were found containing case data older than ' + f'{table.STALE_DATE_THRESHOLD_DAYS} days.' + ) + send_mail_async.delay( + subject, message, [settings.SOLUTIONS_AES_EMAIL] + ) From eea21c5ff0ef7b35eb9cc06309af3563134089af Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Tue, 26 Nov 2024 14:19:16 +0200 Subject: [PATCH 03/17] unit tests --- corehq/apps/hqadmin/tests/test_reports.py | 53 ++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/corehq/apps/hqadmin/tests/test_reports.py b/corehq/apps/hqadmin/tests/test_reports.py index 6302ba05c179..625d600faafd 100644 --- a/corehq/apps/hqadmin/tests/test_reports.py +++ b/corehq/apps/hqadmin/tests/test_reports.py @@ -1,7 +1,13 @@ +from datetime import datetime, timedelta +from uuid import uuid4 + from django.test import TestCase from unittest.mock import patch -from corehq.apps.hqadmin.reports import UCRRebuildRestrictionTable +from corehq.apps.es.case_search import case_search_adapter +from corehq.apps.es.tests.utils import es_test +from corehq.apps.hqadmin.reports import UCRRebuildRestrictionTable, StaleCasesTable +from corehq.form_processor.models import CommCareCase from corehq.motech.repeaters.const import UCRRestrictionFFStatus @@ -76,3 +82,48 @@ def test_should_show_domain_show_should_disable_ff_domains(self, restriction_ff_ self.assertFalse(table_data.should_show_domain( domain='domain', total_cases=100_000_000, total_forms=0) ) + + +@es_test(requires=[case_search_adapter], setup_class=True) +class TestStaleCasesTable(TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cases = [ + cls._get_case(days_back=0), + cls._get_case(days_back=366), + cls._get_case(days_back=380, is_closed=True), + cls._get_case(days_back=365), + ] + case_search_adapter.bulk_index(cases, refresh=True) + cls.table = StaleCasesTable() + + @classmethod + def _get_case(cls, days_back, is_closed=False): + server_modified_on = datetime.now() - timedelta(days=days_back) + return CommCareCase( + case_id=uuid4().hex, + domain='test', + server_modified_on=server_modified_on, + closed=is_closed + ) + + def test_stale_case_count(self): + res = self.table._stale_case_count() + self.assertEqual(len(res), 1) + self.assertEqual( + (res['test'].key, res['test'].doc_count), + ('test', 2) + ) + + def test_format_as_table(self): + expected_output = ( + 'Domain | Case count\n' + '-------------------\n' + 'test | 2 ' + ) + self.assertEqual( + self.table.format_as_table(self.table.rows, self.table.headers), + expected_output + ) From 305d37ed7025950707c20517dd74c979560ac4eb Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Tue, 26 Nov 2024 14:19:46 +0200 Subject: [PATCH 04/17] nit: set size for aggregated query --- corehq/apps/hqadmin/reports.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index 3b2bff82928f..fb076a88e558 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -405,13 +405,13 @@ def should_show_domain(self, domain, total_cases, total_forms): @staticmethod def _case_count_by_domain(domains): - return CaseES().domain(domains).aggregation( + return CaseES().domain(domains).size(0).aggregation( TermsAggregation('domain', 'domain.exact') ).run().aggregations.domain.buckets_dict @staticmethod def _forms_count_by_domain(domains): - return FormES().domain(domains).aggregation( + return FormES().domain(domains).size(0).aggregation( TermsAggregation('domain', 'domain.exact') ).run().aggregations.domain.buckets_dict From 96443c6eb3cfecba47d8f6f6e9c2e82047c2f582 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 10:57:12 +0200 Subject: [PATCH 05/17] aggregate query data with date chunks --- corehq/apps/hqadmin/reports.py | 77 ++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index fb076a88e558..b36fee0534a2 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -10,9 +10,12 @@ from dateutil.parser import parse from memoized import memoized +from dimagi.utils.logging import notify_exception + from phonelog.models import DeviceReportEntry from phonelog.reports import BaseDeviceLogReport +from corehq.apps.accounting.models import Subscription, SoftwarePlanEdition from corehq.apps.auditcare.utils.export import navigation_events_by_user from corehq.apps.reports.datatables import DataTablesColumn, DataTablesHeader from corehq.apps.reports.dispatcher import AdminReportDispatcher @@ -31,6 +34,7 @@ from corehq.toggles import USER_CONFIGURABLE_REPORTS, RESTRICT_DATA_SOURCE_REBUILD from corehq.motech.repeaters.const import UCRRestrictionFFStatus from corehq.apps.es.aggregations import TermsAggregation +from corehq.apps.es.exceptions import ESError class AdminReport(GenericTabularReport): @@ -501,9 +505,12 @@ def rows(self): class StaleCasesTable: - + STOP_YEAR = datetime(2005, 1, 1) + AGG_DATE_RANGE = 150 STALE_DATE_THRESHOLD_DAYS = 365 + BACKOFF_AMOUNT = 30 + MAX_BACKOFF_COUNT = 2 @property def headers(self): return DataTablesHeader( @@ -514,26 +521,70 @@ def headers(self): @property def rows(self): rows = [] - case_count_by_domain = self._stale_case_count() - for bucket in case_count_by_domain.values(): - rows.append([bucket.key, bucket.doc_count]) + case_count_by_domain = self._aggregate_case_count_data() + for domain, case_count in case_count_by_domain.items(): + rows.append([domain, case_count]) return rows - def _stale_case_count(self): + def _aggregate_case_count_data(self): + end_date = datetime.now() - timedelta(days=self.STALE_DATE_THRESHOLD_DAYS) + agg_res = {} + curr_backoff_count = 0 + curr_agg_date_range = self.AGG_DATE_RANGE + domains = self._get_domains() + while (True): + start_date = end_date - timedelta(days=self.AGG_DATE_RANGE) + try: + query_res = self._stale_case_count_in_date_range(domains, start_date, end_date) + except ESError as e: + curr_backoff_count += 1 + if curr_backoff_count <= self.MAX_BACKOFF_COUNT: + curr_agg_date_range -= self.AGG_DATE_RANGE + curr_backoff_count += 1 + else: + notify_exception( + None, + 'ES query timed out while compiling stale case report email.', + details={ + 'error': str(e), + 'start_date': start_date.strftime("%Y-%m-%d"), + 'end_date': end_date.strftime("%Y-%m-%d") + } + ) + raise ESError() + curr_backoff_count = 0 + curr_agg_date_range = self.AGG_DATE_RANGE + self._merge_agg_data(agg_res, query_res) + end_date = start_date + if end_date <= self.STOP_YEAR: + break + return agg_res + + def _merge_agg_data(self, agg_res, query_res): + for domain, case_count in query_res.items(): + if domain not in agg_res: + agg_res[domain] = 0 + agg_res[domain] += case_count + + def _stale_case_count_in_date_range(self, domains, start_date, end_date): return ( CaseSearchES() + .domain(domains) + .modified_range(gt=start_date, lt=end_date) .is_closed(False) - .server_modified_range(lt=self._get_stale_date()) - .size(0) .aggregation( TermsAggregation('domain', 'domain.exact') ) - ).run().aggregations.domain.buckets_dict - - def _get_stale_date(self): - current_date = datetime.now() - stale_threshold_date = current_date - timedelta(days=self.STALE_DATE_THRESHOLD_DAYS) - return stale_threshold_date + .size(0) + ).run().aggregations.domain.counts_by_bucket() + + def _get_domains(self): + return list(set( + Subscription.visible_objects + .exclude(plan_version__plan__edition=SoftwarePlanEdition.COMMUNITY) + .filter(is_active=True) + .values_list('subscriber__domain', flat=True) + )) @staticmethod def format_as_table(row_data, data_tables_header): From a307084c3f35e2283ab22dc65654ebc3c873b55c Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 11:05:39 +0200 Subject: [PATCH 06/17] exception handling in task --- corehq/apps/hqwebapp/tasks.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index 45bac6d08321..b1949f579b66 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -20,6 +20,7 @@ from dimagi.utils.web import get_url_base from corehq.apps.celery import periodic_task, task +from corehq.apps.es.exceptions import ESError from corehq.motech.repeaters.const import UCRRestrictionFFStatus from corehq.util.bounced_email_manager import BouncedEmailManager from corehq.util.email_event_utils import get_bounced_system_emails @@ -315,7 +316,11 @@ def send_stale_case_data_info_to_admins(): return table = StaleCasesTable() - row_data = table.rows + has_error = False + try: + row_data = table.rows + except ESError: + has_error = True num_domains = len(row_data) subject = ( f'Monthly report: {num_domains} domains containing stale ' @@ -328,11 +333,21 @@ def send_stale_case_data_info_to_admins(): 'Please see detailed report below:\n' f'{table.format_as_table(row_data, table.headers)}' ) + if has_error: + message += ( + '\nPlease note that an error occurred while compiling the report ' + 'and so the data given may only be partial.' + ) else: message = ( 'No domains were found containing case data older than ' f'{table.STALE_DATE_THRESHOLD_DAYS} days.' ) + if has_error: + message += ( + '\nPlease note that an error occurred while compiling the report ' + 'and so there may be missing data that was not compiled.' + ) send_mail_async.delay( subject, message, [settings.SOLUTIONS_AES_EMAIL] ) From 9cf7aa65db50f38c445ec0985c39dd3b0a3b3884 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 11:55:34 +0200 Subject: [PATCH 07/17] process to csv file instead of datatable --- corehq/apps/hqadmin/reports.py | 25 ------------------------- corehq/apps/hqwebapp/tasks.py | 15 +++++++++++++-- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index b36fee0534a2..d1bf72c2388d 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -585,28 +585,3 @@ def _get_domains(self): .filter(is_active=True) .values_list('subscriber__domain', flat=True) )) - - @staticmethod - def format_as_table(row_data, data_tables_header): - """ - Formats a given set of `row_data` with `headers` into a str formatted table that looks as follows: - - ``` - Header_1 | Header_2 | etc... - --------------------------------- - Alice | 25 | New York - Bob | 30 | Los Angeles - Charlie | 35 | Chicago - ``` - """ - # Calculate width of each col - headers = [str(header.html) for header in data_tables_header] - col_widths = [max(len(str(row[i])) for row in [headers] + row_data) for i in range(len(headers))] - row_format = " | ".join(f"{{:<{w}}}" for w in col_widths) - - lines = [] - lines.append(row_format.format(*headers)) - lines.append("-" * (sum(col_widths) + 3 * (len(headers) - 1))) # Divider - for row in row_data: - lines.append(row_format.format(*row)) - return '\n'.join(lines) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index b1949f579b66..0b9aa1693c89 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -1,3 +1,6 @@ +import csv +from io import StringIO + from smtplib import SMTPDataError from urllib.parse import urlencode, urljoin @@ -326,18 +329,23 @@ def send_stale_case_data_info_to_admins(): f'Monthly report: {num_domains} domains containing stale ' f'case data (older than {table.STALE_DATE_THRESHOLD_DAYS} days)' ) + csv_file = None if num_domains: message = ( f'We have identified {num_domains} domains containing stale ' f'case data older than {table.STALE_DATE_THRESHOLD_DAYS} days.\n' 'Please see detailed report below:\n' - f'{table.format_as_table(row_data, table.headers)}' + 'Please see detailed CSV report attached to this email.' ) if has_error: message += ( '\nPlease note that an error occurred while compiling the report ' 'and so the data given may only be partial.' ) + csv_file = StringIO() + writer = csv.writer(csv_file) + writer.writerow(table.headers) + writer.writerows(row_data) else: message = ( 'No domains were found containing case data older than ' @@ -349,5 +357,8 @@ def send_stale_case_data_info_to_admins(): 'and so there may be missing data that was not compiled.' ) send_mail_async.delay( - subject, message, [settings.SOLUTIONS_AES_EMAIL] + subject, + message, + recipient_list=[settings.SOLUTIONS_AES_EMAIL], + filename=csv_file ) From 3d6772073b449d911d3871e396cf35156e54dfb2 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 11:56:12 +0200 Subject: [PATCH 08/17] cache row data --- corehq/apps/hqadmin/reports.py | 15 ++++++++++----- corehq/apps/hqwebapp/tasks.py | 5 ++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index d1bf72c2388d..a0066faae731 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -511,6 +511,10 @@ class StaleCasesTable: BACKOFF_AMOUNT = 30 MAX_BACKOFF_COUNT = 2 + + def __init__(self): + self._rows = None + @property def headers(self): return DataTablesHeader( @@ -520,11 +524,12 @@ def headers(self): @property def rows(self): - rows = [] - case_count_by_domain = self._aggregate_case_count_data() - for domain, case_count in case_count_by_domain.items(): - rows.append([domain, case_count]) - return rows + if self._rows is None: + self._rows = [] + case_count_by_domain = self._aggregate_case_count_data() + for domain, case_count in case_count_by_domain.items(): + self._rows.append([domain, case_count]) + return self._rows def _aggregate_case_count_data(self): end_date = datetime.now() - timedelta(days=self.STALE_DATE_THRESHOLD_DAYS) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index 0b9aa1693c89..1aeb48269a90 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -321,10 +321,9 @@ def send_stale_case_data_info_to_admins(): table = StaleCasesTable() has_error = False try: - row_data = table.rows + num_domains = len(table.rows) except ESError: has_error = True - num_domains = len(row_data) subject = ( f'Monthly report: {num_domains} domains containing stale ' f'case data (older than {table.STALE_DATE_THRESHOLD_DAYS} days)' @@ -345,7 +344,7 @@ def send_stale_case_data_info_to_admins(): csv_file = StringIO() writer = csv.writer(csv_file) writer.writerow(table.headers) - writer.writerows(row_data) + writer.writerows(table.rows) else: message = ( 'No domains were found containing case data older than ' From 19ddf0b7e45d2a1d326a8ba5dd26a55ee934d6fa Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 11:56:34 +0200 Subject: [PATCH 09/17] only execute for production --- corehq/apps/hqwebapp/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index 1aeb48269a90..f42f8183c9e9 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -315,7 +315,7 @@ def send_domain_ucr_data_info_to_admins(): def send_stale_case_data_info_to_admins(): from corehq.apps.hqadmin.reports import StaleCasesTable - if not settings.SOLUTIONS_AES_EMAIL: + if not settings.SOLUTIONS_AES_EMAIL or settings.SERVER_ENVIRONMENT != 'production': return table = StaleCasesTable() From 426bf32fd8ce1d9d1c1860cc1e7a9161c94894f8 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 12:07:32 +0200 Subject: [PATCH 10/17] update unit tests --- corehq/apps/hqadmin/tests/test_reports.py | 29 +++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/corehq/apps/hqadmin/tests/test_reports.py b/corehq/apps/hqadmin/tests/test_reports.py index 625d600faafd..8c76e3ed85b2 100644 --- a/corehq/apps/hqadmin/tests/test_reports.py +++ b/corehq/apps/hqadmin/tests/test_reports.py @@ -92,38 +92,27 @@ def setUpClass(cls): super().setUpClass() cases = [ cls._get_case(days_back=0), - cls._get_case(days_back=366), - cls._get_case(days_back=380, is_closed=True), cls._get_case(days_back=365), + cls._get_case(days_back=380, is_closed=True), + cls._get_case(days_back=365 * 21), ] case_search_adapter.bulk_index(cases, refresh=True) cls.table = StaleCasesTable() @classmethod def _get_case(cls, days_back, is_closed=False): - server_modified_on = datetime.now() - timedelta(days=days_back) + modified_on = datetime.now() - timedelta(days=days_back) return CommCareCase( case_id=uuid4().hex, domain='test', - server_modified_on=server_modified_on, + modified_on=modified_on, closed=is_closed ) - def test_stale_case_count(self): - res = self.table._stale_case_count() - self.assertEqual(len(res), 1) - self.assertEqual( - (res['test'].key, res['test'].doc_count), - ('test', 2) - ) - - def test_format_as_table(self): - expected_output = ( - 'Domain | Case count\n' - '-------------------\n' - 'test | 2 ' - ) + @patch.object(StaleCasesTable, '_get_domains') + def test_get_rows(self, _get_domains_mock): + _get_domains_mock.return_value = ['test'] self.assertEqual( - self.table.format_as_table(self.table.rows, self.table.headers), - expected_output + self.table.rows, + [['test', 1]] ) From db65b562b22185870d650fa1348ed536c18b45e8 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 12:11:37 +0200 Subject: [PATCH 11/17] make stop point dynamic --- corehq/apps/hqadmin/reports.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index a0066faae731..3f672fddc361 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -505,7 +505,7 @@ def rows(self): class StaleCasesTable: - STOP_YEAR = datetime(2005, 1, 1) + STOP_POINT_DAYS_AGO = 365 * 20 AGG_DATE_RANGE = 150 STALE_DATE_THRESHOLD_DAYS = 365 @@ -514,6 +514,7 @@ class StaleCasesTable: def __init__(self): self._rows = None + self.stop_date = datetime.now() - timedelta(days=self.STOP_POINT_DAYS_AGO) @property def headers(self): @@ -561,7 +562,7 @@ def _aggregate_case_count_data(self): curr_agg_date_range = self.AGG_DATE_RANGE self._merge_agg_data(agg_res, query_res) end_date = start_date - if end_date <= self.STOP_YEAR: + if end_date <= self.stop_date: break return agg_res From 255e832f3ddd387a5e87b3effdce1992f9dd330a Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 12:11:53 +0200 Subject: [PATCH 12/17] correctly apply backoff amount --- corehq/apps/hqadmin/reports.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index 3f672fddc361..1887908e3780 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -509,7 +509,7 @@ class StaleCasesTable: AGG_DATE_RANGE = 150 STALE_DATE_THRESHOLD_DAYS = 365 - BACKOFF_AMOUNT = 30 + BACKOFF_AMOUNT_DAYS = 30 MAX_BACKOFF_COUNT = 2 def __init__(self): @@ -545,7 +545,7 @@ def _aggregate_case_count_data(self): except ESError as e: curr_backoff_count += 1 if curr_backoff_count <= self.MAX_BACKOFF_COUNT: - curr_agg_date_range -= self.AGG_DATE_RANGE + curr_agg_date_range -= self.BACKOFF_AMOUNT_DAYS curr_backoff_count += 1 else: notify_exception( From adf92e0934d39ab60da300e19544e7109d94bc77 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 12:22:25 +0200 Subject: [PATCH 13/17] remove redundant text --- corehq/apps/hqwebapp/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index f42f8183c9e9..d23a2e6d33c1 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -333,7 +333,6 @@ def send_stale_case_data_info_to_admins(): message = ( f'We have identified {num_domains} domains containing stale ' f'case data older than {table.STALE_DATE_THRESHOLD_DAYS} days.\n' - 'Please see detailed report below:\n' 'Please see detailed CSV report attached to this email.' ) if has_error: From d1eb95d37c12cb9166863fc86e5cb77f924911c8 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Thu, 12 Dec 2024 13:09:19 +0200 Subject: [PATCH 14/17] correctly send attachment with email --- corehq/apps/hqwebapp/tasks.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index d23a2e6d33c1..95054b9173f3 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -9,6 +9,7 @@ from django.core.mail.message import EmailMessage from django.core.management import call_command from django.urls import reverse +from django.template.defaultfilters import linebreaksbr from django.utils.translation import gettext as _ from celery.exceptions import MaxRetriesExceededError @@ -314,6 +315,7 @@ def send_domain_ucr_data_info_to_admins(): @periodic_task(run_every=crontab(minute=0, hour=1, day_of_month=1)) def send_stale_case_data_info_to_admins(): from corehq.apps.hqadmin.reports import StaleCasesTable + from corehq.apps.hqwebapp.tasks import send_html_email_async if not settings.SOLUTIONS_AES_EMAIL or settings.SERVER_ENVIRONMENT != 'production': return @@ -354,9 +356,9 @@ def send_stale_case_data_info_to_admins(): '\nPlease note that an error occurred while compiling the report ' 'and so there may be missing data that was not compiled.' ) - send_mail_async.delay( + send_html_email_async.delay( subject, - message, - recipient_list=[settings.SOLUTIONS_AES_EMAIL], - filename=csv_file + recipient=settings.SOLUTIONS_AES_EMAIL, + html_content=linebreaksbr(message), + file_attachments=[csv_file] ) From ee92b43c1856f7312e6d64c1fdf6d205104a21f0 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Fri, 13 Dec 2024 10:34:29 +0200 Subject: [PATCH 15/17] only show error message if relevant --- corehq/apps/hqwebapp/tasks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py index 95054b9173f3..1ae76b44a75f 100644 --- a/corehq/apps/hqwebapp/tasks.py +++ b/corehq/apps/hqwebapp/tasks.py @@ -346,16 +346,16 @@ def send_stale_case_data_info_to_admins(): writer = csv.writer(csv_file) writer.writerow(table.headers) writer.writerows(table.rows) + elif has_error: + message = ( + '\nPlease note that an error occurred while compiling the report ' + 'and so there may be missing data that was not compiled.' + ) else: message = ( 'No domains were found containing case data older than ' f'{table.STALE_DATE_THRESHOLD_DAYS} days.' ) - if has_error: - message += ( - '\nPlease note that an error occurred while compiling the report ' - 'and so there may be missing data that was not compiled.' - ) send_html_email_async.delay( subject, recipient=settings.SOLUTIONS_AES_EMAIL, From 7ec4e5b3a68e7c647c8dd70613597a1667d59b8b Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Fri, 13 Dec 2024 10:35:00 +0200 Subject: [PATCH 16/17] minor fixes to query logic --- corehq/apps/hqadmin/reports.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index 1887908e3780..7e2b8216b49f 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -539,14 +539,13 @@ def _aggregate_case_count_data(self): curr_agg_date_range = self.AGG_DATE_RANGE domains = self._get_domains() while (True): - start_date = end_date - timedelta(days=self.AGG_DATE_RANGE) + start_date = end_date - timedelta(days=curr_agg_date_range) try: query_res = self._stale_case_count_in_date_range(domains, start_date, end_date) except ESError as e: curr_backoff_count += 1 if curr_backoff_count <= self.MAX_BACKOFF_COUNT: curr_agg_date_range -= self.BACKOFF_AMOUNT_DAYS - curr_backoff_count += 1 else: notify_exception( None, @@ -558,12 +557,13 @@ def _aggregate_case_count_data(self): } ) raise ESError() - curr_backoff_count = 0 - curr_agg_date_range = self.AGG_DATE_RANGE - self._merge_agg_data(agg_res, query_res) - end_date = start_date - if end_date <= self.stop_date: - break + else: + curr_backoff_count = 0 + curr_agg_date_range = self.AGG_DATE_RANGE + self._merge_agg_data(agg_res, query_res) + end_date = start_date + if end_date <= self.stop_date: + break return agg_res def _merge_agg_data(self, agg_res, query_res): From 25d9dc5cffb9417c6873aaf7a86cf7afb715e134 Mon Sep 17 00:00:00 2001 From: Zandre Engelbrecht Date: Fri, 13 Dec 2024 12:40:46 +0200 Subject: [PATCH 17/17] use defaultdict --- corehq/apps/hqadmin/reports.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/corehq/apps/hqadmin/reports.py b/corehq/apps/hqadmin/reports.py index 7e2b8216b49f..240ee9f6ecea 100644 --- a/corehq/apps/hqadmin/reports.py +++ b/corehq/apps/hqadmin/reports.py @@ -1,3 +1,4 @@ +from collections import defaultdict from datetime import datetime, timedelta from django.urls import reverse @@ -534,7 +535,7 @@ def rows(self): def _aggregate_case_count_data(self): end_date = datetime.now() - timedelta(days=self.STALE_DATE_THRESHOLD_DAYS) - agg_res = {} + agg_res = defaultdict(lambda: 0) curr_backoff_count = 0 curr_agg_date_range = self.AGG_DATE_RANGE domains = self._get_domains() @@ -568,8 +569,6 @@ def _aggregate_case_count_data(self): def _merge_agg_data(self, agg_res, query_res): for domain, case_count in query_res.items(): - if domain not in agg_res: - agg_res[domain] = 0 agg_res[domain] += case_count def _stale_case_count_in_date_range(self, domains, start_date, end_date):