From a022fb49df3c7fcab146e0bb22b362453ca1c226 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Fri, 21 May 2021 19:51:03 -0700 Subject: [PATCH 01/26] Refactor backfill mgmt cmds, pass common options incl date range, site --- figures/management/commands/__init__.py | 55 ++++++++++++ .../commands/backfill_daily_metrics.py | 58 ++++++++++++ ...nt_data.py => backfill_enrollment_data.py} | 25 ++---- ...metrics.py => backfill_monthly_metrics.py} | 19 +--- .../commands/populate_figures_metrics.py | 88 ------------------- 5 files changed, 125 insertions(+), 120 deletions(-) create mode 100644 figures/management/commands/backfill_daily_metrics.py rename figures/management/commands/{update_figures_enrollment_data.py => backfill_enrollment_data.py} (68%) rename figures/management/commands/{backfill_figures_metrics.py => backfill_monthly_metrics.py} (72%) delete mode 100644 figures/management/commands/populate_figures_metrics.py diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index e69de29b..adfc9205 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -0,0 +1,55 @@ +""" +Management commands for Figures. +""" +import datetime +from textwrap import dedent + +from django.core.management.base import BaseCommand + + +class BaseBackfillCommand(BaseCommand): + '''Base class for Figures backfill management commands with common options. + ''' + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + ''' + ''' + parser.add_argument( + '--site', + help='backfill a specific site. provide id or domain name', + default=None + ) + parser.add_argument( + '--date_start', + help='date for which we start backfilling data, in yyyy-mm-dd format', + action='store_true' + ) + parser.add_argument( + '--date_end', + help='date for which we end backfilling data, in yyyy-mm-dd format', + action='store_true' + ) + parser.add_argument( + '--no-delay', + action='store_true', + default=False, + help='Disable the celery "delay" directive' + ) + parser.add_argument( + '--overwrite', + action='store_true', + default=False, + help='Overwrite metrics records if they exist for the given date' + ) + # parser.add_argument( + # '--ignore_exceptions', + # action='store_true', + # default=False, + # help='Print exceptions if thrown, and continue processing' + # ) + + def print_exc(metrics_type, date, exc_message): + print("Could not populate {} for {}. Exception was {}".format( + metrics_type, date, exc_message) + ) diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py new file mode 100644 index 00000000..ffa3b7ae --- /dev/null +++ b/figures/management/commands/backfill_daily_metrics.py @@ -0,0 +1,58 @@ +'''Management command to manually populate course metrics + +see the model ``edx_figures.models.CourseDailyMetrics`` +''' + +from __future__ import print_function + +from __future__ import absolute_import + +import datetime +from dateutil.rrule import rrule, DAILY + +from figures.tasks import ( + populate_daily_metrics, + experimental_populate_daily_metrics +) + +from . import BaseBackfillCommand + + +class Command(BaseBackfillCommand): + '''Populate Figures daily metrics models + ''' + def handle(self, *args, **options): + ''' + Note the '# pragma: no cover' lines below. This is because we are not + yet mocking celery for test coverage + ''' + date_start = options['date_start'] or datetime.date.today() + date_end = options['date_end'] or datetime.date.today() + + experimental = options['experimental'] + options.pop('experimental') + + print('BEGIN RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) + + # populate daily metrics one day at a time for date range + for dt in rrule(DAILY, dtstart=date_start, until=date_end): + + print('BEGIN: Backfill Figures daily metrics metrics for: '.format(dt)) + + kwargs = dict( + date_for=dt, + force_update=options['overwrite'], + ) + + metrics_func = experimental_populate_daily_metrics if experimental else populate_daily_metrics + # try: + metrics_func(**kwargs) if options['no_delay'] else metrics_func.delay(**kwargs) # pragma: no cover + # except Exception as e: # pylint: disable=bare-except + # if options['ignore_exceptions']: + # self.print_exc("daily", dt, e.message) + # else: + # raise + + print('END: Backfill Figures daily metrics metrics for: '.format(options['date'] or 'today')) + + print('END RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) diff --git a/figures/management/commands/update_figures_enrollment_data.py b/figures/management/commands/backfill_enrollment_data.py similarity index 68% rename from figures/management/commands/update_figures_enrollment_data.py rename to figures/management/commands/backfill_enrollment_data.py index 9ba973fb..0e1c72cb 100644 --- a/figures/management/commands/update_figures_enrollment_data.py +++ b/figures/management/commands/backfill_enrollment_data.py @@ -6,10 +6,13 @@ from __future__ import print_function from __future__ import absolute_import from textwrap import dedent + from django.contrib.sites.models import Site -from django.core.management.base import BaseCommand -from figures.tasks import update_enrollment_data + from figures.sites import get_sites +from figures.tasks import update_enrollment_data + +from . import BaseBackfillCommand def get_site(identifier): @@ -23,23 +26,13 @@ def get_site(identifier): return Site.objects.get(**filter_arg) -class Command(BaseCommand): - """Populate Figures metrics models - - Improvements +class Command(BaseBackfillCommand): + """Backfill Figures EnrollmentData model. """ help = dedent(__doc__).strip() - def add_arguments(self, parser): - parser.add_argument('--no-delay', - action='store_true', - default=False, - help='Disable the celery "delay" directive') - parser.add_argument('--site', - help='backfill a specific site. provide id or domain name') - def handle(self, *args, **options): - print('BEGIN: Update Figures EnrollmentData') + print('BEGIN: Backfill Figures EnrollmentData') if options['site']: sites = [get_site(options['site'])] @@ -52,4 +45,4 @@ def handle(self, *args, **options): else: update_enrollment_data.delay(site_id=site.id) # pragma: no cover - print('DONE: Update Figures EnrollmentData') + print('DONE: Backfill Figures EnrollmentData') diff --git a/figures/management/commands/backfill_figures_metrics.py b/figures/management/commands/backfill_monthly_metrics.py similarity index 72% rename from figures/management/commands/backfill_figures_metrics.py rename to figures/management/commands/backfill_monthly_metrics.py index 5261c48e..3fb6d36f 100644 --- a/figures/management/commands/backfill_figures_metrics.py +++ b/figures/management/commands/backfill_monthly_metrics.py @@ -5,7 +5,6 @@ from __future__ import print_function from __future__ import absolute_import -from textwrap import dedent from django.contrib.sites.models import Site from django.core.management.base import BaseCommand @@ -44,22 +43,10 @@ def backfill_site(site, overwrite): class Command(BaseCommand): - """Populate Figures metrics models - - Improvements + """Backfill Figures monthly metrics models. """ - help = dedent(__doc__).strip() - - def add_arguments(self, parser): - parser.add_argument('--overwrite', - action='store_true', - default=False, - help='overwrite existing data in SiteMonthlyMetrics') - parser.add_argument('--site', - help='backfill a specific site. provide id or domain name') - def handle(self, *args, **options): - print('BEGIN: Backfill Figures Metrics') + print('BEGIN: Backfill Figures Monthly Metrics') if options['site']: sites = [get_site(options['site'])] @@ -68,4 +55,4 @@ def handle(self, *args, **options): for site in sites: backfill_site(site, overwrite=options['overwrite']) - print('DONE: Backfill Figures Metrics') + print('END: Backfill Figures Metrics') diff --git a/figures/management/commands/populate_figures_metrics.py b/figures/management/commands/populate_figures_metrics.py deleted file mode 100644 index ec61cba0..00000000 --- a/figures/management/commands/populate_figures_metrics.py +++ /dev/null @@ -1,88 +0,0 @@ -'''Management command to manually populate course metrics - -see the model ``edx_figures.models.CourseDailyMetrics`` -''' - -from __future__ import print_function - -from __future__ import absolute_import -from textwrap import dedent - -from django.core.management.base import BaseCommand - -from figures.tasks import ( - populate_daily_metrics, - experimental_populate_daily_metrics, - populate_all_mau, -) - - -class Command(BaseCommand): - '''Populate Figures metrics models - ''' - help = dedent(__doc__).strip() - - def add_arguments(self, parser): - ''' - ''' - - parser.add_argument('--date', - help='date for which we are retrieving data in yyyy-mm-dd format') - parser.add_argument('--no-delay', - action='store_true', - default=False, - help='Disable the celery "delay" directive') - parser.add_argument('--force-update', - action='store_true', - default=False, - help='Overwrite metrics records if they exist for the given date') - parser.add_argument('--experimental', - action='store_true', - default=False, - help=('Run with Celery workflows (Warning: This is still under' + - ' development and likely to get stuck/hung jobs')) - parser.add_argument('--mau', - action='store_true', - default=False, - help='Run just the MAU pipeline') - - def handle(self, *args, **options): - ''' - Note the '# pragma: no cover' lines below. This is because we are not - yet mocking celery for test coverage - - The 'mau' conditional check in this method is a quick hack to run the - MAU task from this command. What we probably want is a 'figures_cli' - command with subcommands. - ''' - print('populating Figures metrics...') - - kwargs = dict( - date_for=options['date'], - force_update=options['force_update'], - ) - - if options['mau']: - if options['no_delay']: - populate_all_mau() - else: - populate_all_mau.delay() # pragma: no cover - else: - experimental = options['experimental'] - options.pop('experimental') - - if experimental: - if options['no_delay']: - experimental_populate_daily_metrics(**kwargs) - else: - experimental_populate_daily_metrics.delay(**kwargs) # pragma: no cover - else: - if options['no_delay']: - populate_daily_metrics(**kwargs) - else: - populate_daily_metrics.delay(**kwargs) # pragma: no cover - - # TODO: improve this message to say 'today' when options['date'] is None - print('Management command populate_figures_metrics complete. date_for: {}'.format( - options['date'])) - print('Done.') From cd327da4a94ffb9a1a8f9393ab6c287a01edc133 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 24 May 2021 14:21:18 -0700 Subject: [PATCH 02/26] Backfill mgmt cmds base class method to get single or all sites --- figures/management/commands/__init__.py | 22 ++++++++++++---- .../commands/backfill_daily_metrics.py | 12 +++++++-- .../commands/backfill_enrollment_data.py | 21 ++------------- .../commands/backfill_monthly_metrics.py | 26 +++++-------------- figures/tasks.py | 3 +-- 5 files changed, 36 insertions(+), 48 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index adfc9205..a578c64e 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -1,23 +1,35 @@ """ Management commands for Figures. """ -import datetime -from textwrap import dedent - +from django.contrib.sites.models import Site from django.core.management.base import BaseCommand +from figures.sites import get_sites + class BaseBackfillCommand(BaseCommand): '''Base class for Figures backfill management commands with common options. ''' - help = dedent(__doc__).strip() + def get_sites(identifier=None): + """Quick-n-dirty function to let the caller choose the site id or domain. + If no identifier is passed, return all available Sites. + Let the 'get' fail if record can't be found from the identifier. + """ + if not identifier: + return get_sites() + else: + try: + filter_arg = dict(pk=int(identifier)) + except ValueError: + filter_arg = dict(domain=identifier) + return Site.objects.filter(**filter_arg) def add_arguments(self, parser): ''' ''' parser.add_argument( '--site', - help='backfill a specific site. provide id or domain name', + help='backfill a specific site. provide numeric id or domain name', default=None ) parser.add_argument( diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index ffa3b7ae..c47afc39 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -9,6 +9,7 @@ import datetime from dateutil.rrule import rrule, DAILY +from textwrap import dedent from figures.tasks import ( populate_daily_metrics, @@ -21,6 +22,9 @@ class Command(BaseBackfillCommand): '''Populate Figures daily metrics models ''' + + help = dedent(__doc__).strip() + def handle(self, *args, **options): ''' Note the '# pragma: no cover' lines below. This is because we are not @@ -40,13 +44,17 @@ def handle(self, *args, **options): print('BEGIN: Backfill Figures daily metrics metrics for: '.format(dt)) kwargs = dict( + sites=self.get_sites(options['site']), date_for=dt, - force_update=options['overwrite'], + force_update=options['overwrite'] ) metrics_func = experimental_populate_daily_metrics if experimental else populate_daily_metrics # try: - metrics_func(**kwargs) if options['no_delay'] else metrics_func.delay(**kwargs) # pragma: no cover + if options['no_delay']: + metrics_func(**kwargs) + else: + metrics_func.delay(**kwargs) # pragma: no cover # except Exception as e: # pylint: disable=bare-except # if options['ignore_exceptions']: # self.print_exc("daily", dt, e.message) diff --git a/figures/management/commands/backfill_enrollment_data.py b/figures/management/commands/backfill_enrollment_data.py index 0e1c72cb..cda098d1 100644 --- a/figures/management/commands/backfill_enrollment_data.py +++ b/figures/management/commands/backfill_enrollment_data.py @@ -5,27 +5,14 @@ """ from __future__ import print_function from __future__ import absolute_import -from textwrap import dedent -from django.contrib.sites.models import Site +from textwrap import dedent -from figures.sites import get_sites from figures.tasks import update_enrollment_data from . import BaseBackfillCommand -def get_site(identifier): - """Quick-n-dirty function to let the caller choose the site id or domain - Let the 'get' fail if record can't be found from the identifier - """ - try: - filter_arg = dict(pk=int(identifier)) - except ValueError: - filter_arg = dict(domain=identifier) - return Site.objects.get(**filter_arg) - - class Command(BaseBackfillCommand): """Backfill Figures EnrollmentData model. """ @@ -34,11 +21,7 @@ class Command(BaseBackfillCommand): def handle(self, *args, **options): print('BEGIN: Backfill Figures EnrollmentData') - if options['site']: - sites = [get_site(options['site'])] - else: - sites = get_sites() - for site in sites: + for site in self.get_sites(options['site']): print('Updating EnrollmentData for site "{}"'.format(site.domain)) if options['no_delay']: update_enrollment_data(site_id=site.id) diff --git a/figures/management/commands/backfill_monthly_metrics.py b/figures/management/commands/backfill_monthly_metrics.py index 3fb6d36f..5a7cfec1 100644 --- a/figures/management/commands/backfill_monthly_metrics.py +++ b/figures/management/commands/backfill_monthly_metrics.py @@ -3,25 +3,13 @@ """ from __future__ import print_function - from __future__ import absolute_import -from django.contrib.sites.models import Site -from django.core.management.base import BaseCommand +from textwrap import dedent from figures.backfill import backfill_monthly_metrics_for_site -from figures.sites import get_sites - -def get_site(identifier): - """Quick-n-dirty function to let the caller choose the site id or domain - Let the 'get' fail if record can't be found from the identifier - """ - try: - filter_arg = dict(pk=int(identifier)) - except ValueError: - filter_arg = dict(domain=identifier) - return Site.objects.get(**filter_arg) +from . import BaseBackfillCommand def backfill_site(site, overwrite): @@ -42,17 +30,15 @@ def backfill_site(site, overwrite): print('No student modules for site "{}"'.format(site.domain)) -class Command(BaseCommand): +class Command(BaseBackfillCommand): """Backfill Figures monthly metrics models. """ + help = dedent(__doc__).strip() + def handle(self, *args, **options): print('BEGIN: Backfill Figures Monthly Metrics') - if options['site']: - sites = [get_site(options['site'])] - else: - sites = get_sites() - for site in sites: + for site in self.get_sites(options['site']): backfill_site(site, overwrite=options['overwrite']) print('END: Backfill Figures Metrics') diff --git a/figures/tasks.py b/figures/tasks.py index 29b735bd..76762d32 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -153,7 +153,7 @@ def update_enrollment_data(site_id, **_kwargs): @shared_task -def populate_daily_metrics(date_for=None, force_update=False): +def populate_daily_metrics(sites, date_for=None, force_update=False): """Runs Figures daily metrics collection This is a top level Celery task run every 24 hours to collect metrics. @@ -197,7 +197,6 @@ def populate_daily_metrics(date_for=None, force_update=False): date_for = today do_update_enrollment_data = False if date_for < today else True - sites = get_sites() sites_count = sites.count() # This is our task entry log message From deadd6c7d6962b3862827722c3991fa43bfb9d17 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 24 May 2021 14:40:34 -0700 Subject: [PATCH 03/26] BackfillCommand fix date_start, date_end args. Revert --experimental for daily --- figures/management/commands/__init__.py | 2 -- figures/management/commands/backfill_daily_metrics.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index a578c64e..f7da2a5d 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -35,12 +35,10 @@ def add_arguments(self, parser): parser.add_argument( '--date_start', help='date for which we start backfilling data, in yyyy-mm-dd format', - action='store_true' ) parser.add_argument( '--date_end', help='date for which we end backfilling data, in yyyy-mm-dd format', - action='store_true' ) parser.add_argument( '--no-delay', diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index c47afc39..787719e3 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -25,6 +25,16 @@ class Command(BaseBackfillCommand): help = dedent(__doc__).strip() + def add_arguments(self, parser): + parser.add_argument( + '--experimental', + action='store_true', + default=False, + help=('Run with Celery workflows (Warning: This is still under' + ' development and likely to get stuck/hung jobs') + ) + super(Command, self).add_arguments(parser) + def handle(self, *args, **options): ''' Note the '# pragma: no cover' lines below. This is because we are not From b2e52aac29dd72be305482c11277100343bfaa49 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 24 May 2021 16:46:39 -0700 Subject: [PATCH 04/26] BackfillCommand find dates w/ dateutil.parser, default to today --- figures/management/commands/__init__.py | 11 +++++++++++ figures/management/commands/backfill_daily_metrics.py | 5 ++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index f7da2a5d..0fa127f4 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -1,6 +1,9 @@ """ Management commands for Figures. """ +from datetime import datetime +from dateutil import parser + from django.contrib.sites.models import Site from django.core.management.base import BaseCommand @@ -24,6 +27,14 @@ def get_sites(identifier=None): filter_arg = dict(domain=identifier) return Site.objects.filter(**filter_arg) + def get_date(date_str=None): + '''Return a datetime.date from a string or NoneType. + ''' + try: + return parser.parse(date_str).date + except AttributeError: + return datetime.today() + def add_arguments(self, parser): ''' ''' diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 787719e3..0fb84a55 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -40,11 +40,10 @@ def handle(self, *args, **options): Note the '# pragma: no cover' lines below. This is because we are not yet mocking celery for test coverage ''' - date_start = options['date_start'] or datetime.date.today() - date_end = options['date_end'] or datetime.date.today() + date_start = self.get_date(options['date_start']) + date_end = self.get_date(options['date_end']) experimental = options['experimental'] - options.pop('experimental') print('BEGIN RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) From f7a7f9286293f9de46618f48dff1c4a042593af2 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 24 May 2021 16:49:38 -0700 Subject: [PATCH 05/26] BackfillCommand fix get_date() and get_sites() methods --- figures/management/commands/__init__.py | 12 ++++++------ .../management/commands/backfill_daily_metrics.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index 0fa127f4..c7a9ec06 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -13,7 +13,7 @@ class BaseBackfillCommand(BaseCommand): '''Base class for Figures backfill management commands with common options. ''' - def get_sites(identifier=None): + def get_sites(self, identifier=None): """Quick-n-dirty function to let the caller choose the site id or domain. If no identifier is passed, return all available Sites. Let the 'get' fail if record can't be found from the identifier. @@ -27,13 +27,13 @@ def get_sites(identifier=None): filter_arg = dict(domain=identifier) return Site.objects.filter(**filter_arg) - def get_date(date_str=None): + def get_date(self, date_str=None): '''Return a datetime.date from a string or NoneType. ''' try: - return parser.parse(date_str).date + return parser.parse(date_str).date() except AttributeError: - return datetime.today() + return datetime.today().date() def add_arguments(self, parser): ''' @@ -45,11 +45,11 @@ def add_arguments(self, parser): ) parser.add_argument( '--date_start', - help='date for which we start backfilling data, in yyyy-mm-dd format', + help='date for which we start backfilling data', ) parser.add_argument( '--date_end', - help='date for which we end backfilling data, in yyyy-mm-dd format', + help='date for which we end backfilling data', ) parser.add_argument( '--no-delay', diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 0fb84a55..528b22c7 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -50,7 +50,7 @@ def handle(self, *args, **options): # populate daily metrics one day at a time for date range for dt in rrule(DAILY, dtstart=date_start, until=date_end): - print('BEGIN: Backfill Figures daily metrics metrics for: '.format(dt)) + print('BEGIN: Backfill Figures daily metrics metrics for: {}'.format(dt)) kwargs = dict( sites=self.get_sites(options['site']), @@ -70,6 +70,6 @@ def handle(self, *args, **options): # else: # raise - print('END: Backfill Figures daily metrics metrics for: '.format(options['date'] or 'today')) + print('END: Backfill Figures daily metrics metrics for: '.format(dt)) print('END RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) From f383c6311c16dcf68688c9fd7702c0af737ccd6e Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 24 May 2021 22:21:35 -0700 Subject: [PATCH 06/26] BackfillCommand date helpers is_past_date helper method --- figures/backfill.py | 4 ++-- figures/helpers.py | 5 +++++ figures/management/commands/__init__.py | 6 +++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/figures/backfill.py b/figures/backfill.py index b3b0990a..f58d74ea 100644 --- a/figures/backfill.py +++ b/figures/backfill.py @@ -19,8 +19,8 @@ from figures.models import EnrollmentData -def backfill_monthly_metrics_for_site(site, overwrite=False): - """Backfill all historical site metrics for the specified site +def backfill_monthly_metrics_for_site(site, month_for=None, overwrite=False): + """Backfill specified months' historical site metrics for the specified site """ site_sm = get_student_modules_for_site(site) if not site_sm: diff --git a/figures/helpers.py b/figures/helpers.py index 09e11eba..40c7abc1 100644 --- a/figures/helpers.py +++ b/figures/helpers.py @@ -193,6 +193,11 @@ def days_in_month(month_for): return num_days_in_month +def is_past_date(val): + return val.date() < datetime.date.today() + + + # TODO: Consider changing name to 'months_back_iterator' or similar def previous_months_iterator(month_for, months_back): """Iterator returns a year,month tuple for n months including the month_for. diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index c7a9ec06..1a6d57a8 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -2,11 +2,11 @@ Management commands for Figures. """ from datetime import datetime -from dateutil import parser from django.contrib.sites.models import Site from django.core.management.base import BaseCommand +from figures import helpers from figures.sites import get_sites @@ -31,8 +31,8 @@ def get_date(self, date_str=None): '''Return a datetime.date from a string or NoneType. ''' try: - return parser.parse(date_str).date() - except AttributeError: + return helpers.as_date(date_str) + except TypeError: return datetime.today().date() def add_arguments(self, parser): From f89f55952e31b8cf67f365aa3c1288111c576b8f Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 25 May 2021 12:12:47 -0700 Subject: [PATCH 07/26] Docstring clarifications, logging and TODOs for BaseBackfillCommand --- figures/management/commands/__init__.py | 1 + .../management/commands/backfill_daily_metrics.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index 1a6d57a8..6d155459 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -43,6 +43,7 @@ def add_arguments(self, parser): help='backfill a specific site. provide numeric id or domain name', default=None ) + # TODO: handle date start later than date end parser.add_argument( '--date_start', help='date for which we start backfilling data', diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 528b22c7..60719010 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -1,6 +1,6 @@ -'''Management command to manually populate course metrics +'''Management command to manually populate course and site daily metrics -see the model ``edx_figures.models.CourseDailyMetrics`` +see the models ``figures.models.CourseDailyMetrics`` and ``figures.models.SiteDailyMetrics`` ''' from __future__ import print_function @@ -20,7 +20,11 @@ class Command(BaseBackfillCommand): - '''Populate Figures daily metrics models + '''Populate Figures daily metrics models (``CourseDailyMetrics`` and ``SiteDailyMetrics``). + + Note that correctly populating cumulative user and course count for ``SiteDailyMetrics`` + relies on running this sequentially forward from the first date for which StudentModule records + are present. ''' help = dedent(__doc__).strip() @@ -70,6 +74,6 @@ def handle(self, *args, **options): # else: # raise - print('END: Backfill Figures daily metrics metrics for: '.format(dt)) + print('END: Backfill Figures daily metrics metrics for: {}'.format(dt)) print('END RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) From b9309ba3631dc002bd3d8c504cb3d6ed1a43feea Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 25 May 2021 12:14:52 -0700 Subject: [PATCH 08/26] CourseDailyMetricsExtractor don't calculate avg progress for past dates Progress values are not accurate for past dates as we do not use StudentModuleHistory or Persistent Grades, but always check the current value of StudentModule, which will give values accurate only for the time they are retrieved from the db. --- figures/helpers.py | 3 +- figures/pipeline/course_daily_metrics.py | 35 ++++++++++++++++-------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/figures/helpers.py b/figures/helpers.py index 40c7abc1..c57bb172 100644 --- a/figures/helpers.py +++ b/figures/helpers.py @@ -194,8 +194,7 @@ def days_in_month(month_for): def is_past_date(val): - return val.date() < datetime.date.today() - + return as_date(val) < datetime.date.today() # TODO: Consider changing name to 'months_back_iterator' or similar diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index 97943dad..2d77dc79 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -23,7 +23,7 @@ CourseOverview, GeneratedCertificate, StudentModule) -from figures.helpers import as_course_key, as_datetime, next_day +from figures.helpers import as_course_key, as_datetime, is_past_date, next_day import figures.metrics from figures.models import CourseDailyMetrics, PipelineError from figures.pipeline.logger import log_error @@ -263,18 +263,31 @@ def extract(self, course_id, date_for, **_kwargs): data['active_learners_today'] = active_learners_today # Average progress - try: - progress_data = bulk_calculate_course_progress_data(course_id=course_id, - date_for=date_for) - data['average_progress'] = progress_data['average_progress'] - except Exception: # pylint: disable=broad-except - # Broad exception for starters. Refine as we see what gets caught - # Make sure we set the average_progres to None so that upstream - # does not think things are normal + # Progress data cannot be reliable for backfills or for any date prior to today + # without using StudentModuleHistory so we skip getting this data if running + # for a past date (i.e., not during daily update of CDMs), especially since + # it is so expensive to calculate. + # TODO: Reconsider this if we implement either StudentModuleHistory-based queries + # (if so, you will need to add any types you want to StudentModuleHistory.HISTORY_SAVING_TYPES) + # TODO: Reconsider this once we switch to using Persistent Grades + if is_past_date(date_for): data['average_progress'] = None - msg = ('FIGURES:FAIL bulk_calculate_course_progress_data' + msg = ('FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date' ' date_for={date_for}, course_id="{course_id}"') - logger.exception(msg.format(date_for=date_for, course_id=course_id)) + logger.warning(msg.format(date_for=date_for, course_id=course_id)) + else: + try: + progress_data = bulk_calculate_course_progress_data(course_id=course_id, + date_for=date_for) + data['average_progress'] = progress_data['average_progress'] + except Exception: # pylint: disable=broad-except + # Broad exception for starters. Refine as we see what gets caught + # Make sure we set the average_progres to None so that upstream + # does not think things are normal + data['average_progress'] = None + msg = ('FIGURES:FAIL bulk_calculate_course_progress_data' + ' date_for={date_for}, course_id="{course_id}"') + logger.exception(msg.format(date_for=date_for, course_id=course_id)) data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for,) From 1dc0d2cf64cd02b8beaf1355ca3a696dfba5499f Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 25 May 2021 12:25:29 -0700 Subject: [PATCH 09/26] CousreDailyMetricsLoader don't save a None average_progress to model --- figures/pipeline/course_daily_metrics.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index 2d77dc79..ff573eb8 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -319,18 +319,20 @@ def save_metrics(self, date_for, data): Raises django.core.exceptions.ValidationError if the record fails validation """ + defaults = dict( + enrollment_count=data['enrollment_count'], + active_learners_today=data['active_learners_today'], + average_days_to_complete=int(round(data['average_days_to_complete'])), + num_learners_completed=data['num_learners_completed'], + ) + if data['average_progress'] is not None: + defaults['average_progress'] = str(data['average_progress']) cdm, created = CourseDailyMetrics.objects.update_or_create( course_id=str(self.course_id), site=self.site, date_for=date_for, - defaults=dict( - enrollment_count=data['enrollment_count'], - active_learners_today=data['active_learners_today'], - average_progress=str(data['average_progress']), - average_days_to_complete=int(round(data['average_days_to_complete'])), - num_learners_completed=data['num_learners_completed'], - ) + defaults=defaults ) cdm.clean_fields() return (cdm, created,) From e35e3480da1607b02fa098534c145bbb5bdd0747 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 25 May 2021 13:02:40 -0700 Subject: [PATCH 10/26] CDM Only show one warning log msg when skipping average_progress calc Still output for each course/day to debug log backfill_daily_metrics msg re skip CDM.average_progress for past date Cleanup for backfill commands --- figures/management/commands/__init__.py | 6 ------ figures/management/commands/backfill_daily_metrics.py | 1 - figures/pipeline/course_daily_metrics.py | 4 +++- figures/tasks.py | 7 ++++++- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index 6d155459..3c3b5d00 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -64,12 +64,6 @@ def add_arguments(self, parser): default=False, help='Overwrite metrics records if they exist for the given date' ) - # parser.add_argument( - # '--ignore_exceptions', - # action='store_true', - # default=False, - # help='Print exceptions if thrown, and continue processing' - # ) def print_exc(metrics_type, date, exc_message): print("Could not populate {} for {}. Exception was {}".format( diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 60719010..67340ac2 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -7,7 +7,6 @@ from __future__ import absolute_import -import datetime from dateutil.rrule import rrule, DAILY from textwrap import dedent diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index ff573eb8..c48f4df7 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -267,6 +267,8 @@ def extract(self, course_id, date_for, **_kwargs): # without using StudentModuleHistory so we skip getting this data if running # for a past date (i.e., not during daily update of CDMs), especially since # it is so expensive to calculate. + # Note that Avg() applied across null and decimal vals for aggregate average_progress + # will correctly ignore nulls # TODO: Reconsider this if we implement either StudentModuleHistory-based queries # (if so, you will need to add any types you want to StudentModuleHistory.HISTORY_SAVING_TYPES) # TODO: Reconsider this once we switch to using Persistent Grades @@ -274,7 +276,7 @@ def extract(self, course_id, date_for, **_kwargs): data['average_progress'] = None msg = ('FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date' ' date_for={date_for}, course_id="{course_id}"') - logger.warning(msg.format(date_for=date_for, course_id=course_id)) + logger.debug(msg.format(date_for=date_for, course_id=course_id)) else: try: progress_data = bulk_calculate_course_progress_data(course_id=course_id, diff --git a/figures/tasks.py b/figures/tasks.py index 76762d32..26173348 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -20,7 +20,7 @@ from figures.backfill import backfill_enrollment_data_for_site from figures.compat import CourseEnrollment, CourseOverview -from figures.helpers import as_course_key, as_date +from figures.helpers import as_course_key, as_date, is_past_date from figures.log import log_exec_time from figures.pipeline.course_daily_metrics import CourseDailyMetricsLoader from figures.pipeline.site_daily_metrics import SiteDailyMetricsLoader @@ -205,6 +205,11 @@ def populate_daily_metrics(sites, date_for=None, force_update=False): date_for=date_for, site_count=sites_count)) + if is_past_date(date_for): + msg = ('{prefix}:INFO - CourseDailyMetrics.average_progress will not be ' + 'calculated for past date {date_for}') + logger.info(msg.format(date_for=date_for, prefix=FPD_LOG_PREFIX)) + for i, site in enumerate(sites): msg = '{prefix}:SITE:START:{id}:{domain} - Site {i:04d} of {n:04d}' From 2d3e86448686c35919c2d8258f7e260769f3d091 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Thu, 27 May 2021 16:03:19 -0700 Subject: [PATCH 11/26] Add mgmt command to fix bad backfill data in CDM.average_progress --- .../commands/repair_backfilled_progress.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 figures/management/commands/repair_backfilled_progress.py diff --git a/figures/management/commands/repair_backfilled_progress.py b/figures/management/commands/repair_backfilled_progress.py new file mode 100644 index 00000000..2c272b2c --- /dev/null +++ b/figures/management/commands/repair_backfilled_progress.py @@ -0,0 +1,64 @@ +''' +Management command to set as None any CourseDailyMetrics.average_progress values +if produced by a backfill. + +We do this because progress or any other value generated by examining StudentModule +values will not be correct if done for a previous date, until or unless Figures uses +StudentModuleHistory or Persistent Grades to examine db-stored student grade or SM values. +''' + +from __future__ import absolute_import, print_function + +from datetime import timedelta +from textwrap import dedent + +from django.db.models import Count, F, Max, Min + +from figures.models import CourseDailyMetrics + +from . import BaseBackfillCommand + + +class Command(BaseBackfillCommand): + '''Set all CourseDailyMetrics average_progress values to None where CDM was created + more than one day after the date_for value. See module docstring for rationale. + ''' + + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + default=False, + help=('Dry run. Output but don\'t save changes') + ) + super(Command, self).add_arguments(parser) + + def handle(self, *args, **options): + ''' + ''' + site = self.get_sites(options['site']), + + print('FIGURES: Repairing backfilled CDM.average_progress for site {}'.format(site)) + + backfills = CourseDailyMetrics.objects.filter( + created__gt=F('date_for') + timedelta(days=2) + ).annotate(courses_count=Count('course_id', distinct=True)) + + num_backfills = backfills.count() + + logmsg = ( + 'FIGURES: Found {count} records from dates between {date_start} and {date_end} from courses {courses}' + 'to update with None values for average_progress'.format( + count=num_backfills, + date_start=backfills.earliest('date_for'), + date_end=backfills.latest('date_for'), + courses=', \n'.join(backfills.values('course_id').distinct()) + ) + ) + print(logmsg) + + if not options['dry_run']: + print('FIGURES: set average_progress to None for {} CourseDailyMetrics records'.format(num_backfills)) + backfills.update(average_progress=None) From f723cf12de1cb4d40dbb63d0878e88ff3f6bc104 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Thu, 27 May 2021 16:11:00 -0700 Subject: [PATCH 12/26] Restrict repair backfill progress to passed Site --- .../commands/repair_backfilled_progress.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/figures/management/commands/repair_backfilled_progress.py b/figures/management/commands/repair_backfilled_progress.py index 2c272b2c..1c11ba0e 100644 --- a/figures/management/commands/repair_backfilled_progress.py +++ b/figures/management/commands/repair_backfilled_progress.py @@ -38,23 +38,30 @@ def add_arguments(self, parser): def handle(self, *args, **options): ''' ''' - site = self.get_sites(options['site']), + site = self.get_sites(options['site'])[0] print('FIGURES: Repairing backfilled CDM.average_progress for site {}'.format(site)) backfills = CourseDailyMetrics.objects.filter( - created__gt=F('date_for') + timedelta(days=2) + site=site, created__gt=F('date_for') + timedelta(days=2), + average_progress__isnull=False ).annotate(courses_count=Count('course_id', distinct=True)) num_backfills = backfills.count() + if num_backfills == 0: + print('FIGURES: Found no CDM records with average_progress to repair.') + return + logmsg = ( - 'FIGURES: Found {count} records from dates between {date_start} and {date_end} from courses {courses}' - 'to update with None values for average_progress'.format( + 'FIGURES: Found {count} records from dates between {date_start} and {date_end} ' + 'to update with None values for average_progress, from courses:\n\n{courses}.' + '{dry_run_msg}'.format( count=num_backfills, - date_start=backfills.earliest('date_for'), - date_end=backfills.latest('date_for'), - courses=', \n'.join(backfills.values('course_id').distinct()) + date_start=backfills.earliest('date_for').date_for, + date_end=backfills.latest('date_for').date_for, + courses='\n'.join(set(backfills.values_list('course_id', flat=True))), + dry_run_msg = '\n\nDRY RUN. Not updating records.' if options['dry_run'] else '' ) ) print(logmsg) From 21c41d1e082e1a45b5af98ac0a5b45d6068cd8f9 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 1 Jun 2021 17:03:00 -0700 Subject: [PATCH 13/26] Fix BackfillCommand and populate_daily_metrics to take a site id not a Site object --- figures/management/commands/__init__.py | 9 ++++++--- figures/management/commands/backfill_daily_metrics.py | 2 +- figures/management/commands/backfill_enrollment_data.py | 8 ++++---- figures/management/commands/backfill_monthly_metrics.py | 7 +++++-- .../management/commands/repair_backfilled_progress.py | 6 ++++-- .../management/commands/run_figures_monthly_metrics.py | 1 + figures/sites.py | 7 +++++++ figures/tasks.py | 8 ++++++-- 8 files changed, 34 insertions(+), 14 deletions(-) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index 3c3b5d00..cafc5af4 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -13,19 +13,22 @@ class BaseBackfillCommand(BaseCommand): '''Base class for Figures backfill management commands with common options. ''' - def get_sites(self, identifier=None): + def get_site_ids(self, identifier=None): """Quick-n-dirty function to let the caller choose the site id or domain. If no identifier is passed, return all available Sites. Let the 'get' fail if record can't be found from the identifier. + Returns Site ids for passing to Celery tasks. + Note that at present, none of the tasks handle more than one specified Site. """ if not identifier: - return get_sites() + sites = get_sites() else: try: filter_arg = dict(pk=int(identifier)) except ValueError: filter_arg = dict(domain=identifier) - return Site.objects.filter(**filter_arg) + sites = Site.objects.filter(**filter_arg) + return [site.id for site in sites] def get_date(self, date_str=None): '''Return a datetime.date from a string or NoneType. diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 67340ac2..e181d247 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -56,7 +56,7 @@ def handle(self, *args, **options): print('BEGIN: Backfill Figures daily metrics metrics for: {}'.format(dt)) kwargs = dict( - sites=self.get_sites(options['site']), + site_ids=self.get_site_ids(options['site']), date_for=dt, force_update=options['overwrite'] ) diff --git a/figures/management/commands/backfill_enrollment_data.py b/figures/management/commands/backfill_enrollment_data.py index cda098d1..aadec5c8 100644 --- a/figures/management/commands/backfill_enrollment_data.py +++ b/figures/management/commands/backfill_enrollment_data.py @@ -21,11 +21,11 @@ class Command(BaseBackfillCommand): def handle(self, *args, **options): print('BEGIN: Backfill Figures EnrollmentData') - for site in self.get_sites(options['site']): - print('Updating EnrollmentData for site "{}"'.format(site.domain)) + for site_id in self.get_site_ids(options['site']): + print('Updating EnrollmentData for site {}'.format(site_id)) if options['no_delay']: - update_enrollment_data(site_id=site.id) + update_enrollment_data(site_ids=(site_id)) else: - update_enrollment_data.delay(site_id=site.id) # pragma: no cover + update_enrollment_data.delay(site_ids=(site_id)) # pragma: no cover print('DONE: Backfill Figures EnrollmentData') diff --git a/figures/management/commands/backfill_monthly_metrics.py b/figures/management/commands/backfill_monthly_metrics.py index 5a7cfec1..48bb7772 100644 --- a/figures/management/commands/backfill_monthly_metrics.py +++ b/figures/management/commands/backfill_monthly_metrics.py @@ -7,6 +7,8 @@ from textwrap import dedent +from django.contrib.sites.models import Site + from figures.backfill import backfill_monthly_metrics_for_site from . import BaseBackfillCommand @@ -14,7 +16,7 @@ def backfill_site(site, overwrite): - print('Backfilling monthly metrics for site id="{}" domain={}'.format( + print('Backfilling monthly metrics for site id={} domain={}'.format( site.id, site.domain)) backfilled = backfill_monthly_metrics_for_site(site=site, @@ -38,7 +40,8 @@ class Command(BaseBackfillCommand): def handle(self, *args, **options): print('BEGIN: Backfill Figures Monthly Metrics') - for site in self.get_sites(options['site']): + for site_id in self.get_site_ids(options['site']): + site = Site.objects.get(id=site_id) backfill_site(site, overwrite=options['overwrite']) print('END: Backfill Figures Metrics') diff --git a/figures/management/commands/repair_backfilled_progress.py b/figures/management/commands/repair_backfilled_progress.py index 1c11ba0e..0c612cd0 100644 --- a/figures/management/commands/repair_backfilled_progress.py +++ b/figures/management/commands/repair_backfilled_progress.py @@ -12,7 +12,8 @@ from datetime import timedelta from textwrap import dedent -from django.db.models import Count, F, Max, Min +from django.contrib.sites.models import Site +from django.db.models import Count, F from figures.models import CourseDailyMetrics @@ -38,7 +39,8 @@ def add_arguments(self, parser): def handle(self, *args, **options): ''' ''' - site = self.get_sites(options['site'])[0] + site_id = self.get_site_ids(options['site'])[0] + site = Site.objects.get(id=site_id) print('FIGURES: Repairing backfilled CDM.average_progress for site {}'.format(site)) diff --git a/figures/management/commands/run_figures_monthly_metrics.py b/figures/management/commands/run_figures_monthly_metrics.py index 32844ce8..5b45b4d0 100644 --- a/figures/management/commands/run_figures_monthly_metrics.py +++ b/figures/management/commands/run_figures_monthly_metrics.py @@ -6,6 +6,7 @@ from __future__ import print_function from __future__ import absolute_import + from textwrap import dedent from django.core.management.base import BaseCommand diff --git a/figures/sites.py b/figures/sites.py index 61176077..e3ca134c 100644 --- a/figures/sites.py +++ b/figures/sites.py @@ -317,3 +317,10 @@ def get_sites(): sites = _get_all_sites() return sites + + +def get_sites_by_id(site_ids): + """ + Convenience function to get a QuerySet of Sites by id. + """ + return Site.objects.filter(id=site_ids) diff --git a/figures/tasks.py b/figures/tasks.py index 26173348..5cb537e8 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -24,7 +24,7 @@ from figures.log import log_exec_time from figures.pipeline.course_daily_metrics import CourseDailyMetricsLoader from figures.pipeline.site_daily_metrics import SiteDailyMetricsLoader -from figures.sites import get_sites, site_course_ids +from figures.sites import get_sites, get_sites_by_id, site_course_ids from figures.pipeline.mau_pipeline import collect_course_mau from figures.pipeline.helpers import DateForCannotBeFutureError from figures.pipeline.site_monthly_metrics import fill_last_month as fill_last_smm_month @@ -153,7 +153,7 @@ def update_enrollment_data(site_id, **_kwargs): @shared_task -def populate_daily_metrics(sites, date_for=None, force_update=False): +def populate_daily_metrics(site_id=None, date_for=None, force_update=False): """Runs Figures daily metrics collection This is a top level Celery task run every 24 hours to collect metrics. @@ -197,6 +197,10 @@ def populate_daily_metrics(sites, date_for=None, force_update=False): date_for = today do_update_enrollment_data = False if date_for < today else True + if site_id is not None: + sites = get_sites_by_id((site_id, )) + else: + sites = get_sites() sites_count = sites.count() # This is our task entry log message From 23c1b5df23c6dfe49551e38889cd37cb59085fd0 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 1 Jun 2021 17:23:51 -0700 Subject: [PATCH 14/26] backfilly daily date_for, site_id fixes --- figures/management/commands/backfill_daily_metrics.py | 4 ++-- figures/tasks.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index e181d247..bc041562 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -56,8 +56,8 @@ def handle(self, *args, **options): print('BEGIN: Backfill Figures daily metrics metrics for: {}'.format(dt)) kwargs = dict( - site_ids=self.get_site_ids(options['site']), - date_for=dt, + site_id=self.get_site_ids(options['site'])[0], + date_for=str(dt), force_update=options['overwrite'] ) diff --git a/figures/tasks.py b/figures/tasks.py index 5cb537e8..93a38556 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -264,7 +264,7 @@ def populate_daily_metrics(site_id=None, date_for=None, force_update=False): @shared_task -def experimental_populate_daily_metrics(date_for=None, force_update=False): +def experimental_populate_daily_metrics(site_id=None, date_for=None, force_update=False): '''Experimental task to populate daily metrics WARNING: In Ginkgo devstack, this task tends to gets stuck in the middle of From a03f77a9631398812c37821bb6990d9796288109 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 1 Jun 2021 17:34:36 -0700 Subject: [PATCH 15/26] Fix sites.get_sites_by_id --- figures/sites.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/figures/sites.py b/figures/sites.py index e3ca134c..32ef5836 100644 --- a/figures/sites.py +++ b/figures/sites.py @@ -323,4 +323,4 @@ def get_sites_by_id(site_ids): """ Convenience function to get a QuerySet of Sites by id. """ - return Site.objects.filter(id=site_ids) + return Site.objects.filter(id__in=site_ids) From 0ede59e1c6a5544e8499cd05bdfcc2e80beee59f Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Wed, 2 Jun 2021 17:27:02 -0700 Subject: [PATCH 16/26] Defend against a Site Montly Metrics average_progress of None Can happen if time period is filled entirely by backfilling --- figures/metrics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/figures/metrics.py b/figures/metrics.py index bd1a7778..d380d11a 100644 --- a/figures/metrics.py +++ b/figures/metrics.py @@ -477,7 +477,10 @@ def get_course_average_progress_for_time_period(site, start_date, end_date, cour qs = CourseDailyMetrics.objects.filter(**filter_args) if qs: value = qs.aggregate(average=Avg('average_progress'))['average'] - return float(Decimal(value).quantize(Decimal('.00'))) + try: + return float(Decimal(value).quantize(Decimal('.00'))) + except TypeError: # if value is None + return 0.0 else: return 0.0 From e7e8ca6c973d45e9a3892969070a32f8af9501a6 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 8 Jun 2021 17:45:19 -0700 Subject: [PATCH 17/26] Allow average_progress calc for prior day, but no earlier --- figures/pipeline/course_daily_metrics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index c48f4df7..b08202d5 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -15,6 +15,7 @@ from decimal import Decimal import logging +from dateutil.relativedelta import relativedelta from django.db import transaction from student.roles import CourseCcxCoachRole, CourseInstructorRole, CourseStaffRole # noqa pylint: disable=import-error @@ -263,16 +264,16 @@ def extract(self, course_id, date_for, **_kwargs): data['active_learners_today'] = active_learners_today # Average progress - # Progress data cannot be reliable for backfills or for any date prior to today + # Progress data cannot be reliable for backfills or for any date prior to yesterday # without using StudentModuleHistory so we skip getting this data if running - # for a past date (i.e., not during daily update of CDMs), especially since - # it is so expensive to calculate. + # for a day earlier than previous day (i.e., not during daily update of CDMs), + # especially since it is so expensive to calculate. # Note that Avg() applied across null and decimal vals for aggregate average_progress # will correctly ignore nulls # TODO: Reconsider this if we implement either StudentModuleHistory-based queries # (if so, you will need to add any types you want to StudentModuleHistory.HISTORY_SAVING_TYPES) # TODO: Reconsider this once we switch to using Persistent Grades - if is_past_date(date_for): + if is_past_date(date_for + relativedelta(days=1)): # more than 1 day in past data['average_progress'] = None msg = ('FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date' ' date_for={date_for}, course_id="{course_id}"') From a956e42f8ba1fad3f72d6ea4ae1549c09237320b Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Tue, 8 Jun 2021 18:02:05 -0700 Subject: [PATCH 18/26] Test CourseDailyMetricsLoader average_progress for yesterday, prior --- tests/pipeline/test_course_daily_metrics.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/pipeline/test_course_daily_metrics.py b/tests/pipeline/test_course_daily_metrics.py index 6e039e46..b51833ab 100644 --- a/tests/pipeline/test_course_daily_metrics.py +++ b/tests/pipeline/test_course_daily_metrics.py @@ -8,6 +8,8 @@ import mock import pytest +from dateutil.relativedelta import relativedelta + from django.core.exceptions import PermissionDenied, ValidationError from figures.compat import CourseAccessRole, CourseEnrollment @@ -335,6 +337,16 @@ def test_load_invalid_data(self, monkeypatch, average_progress): assert CourseDailyMetrics.objects.count() == 0 assert 'average_progress' in execinfo.value.message_dict + @pytest.mark.parametrize(['days_back', 'expected_val'], [(1, 1), (2, None)]) + def test_average_progress_yesterday_or_prior(self, monkeypatch, days_back, expected_val): + monkeypatch.setattr(figures.pipeline.course_daily_metrics, + 'bulk_calculate_course_progress_data', + lambda **_kwargs: dict(average_progress=1)) + course_id = self.course_enrollments[0].course_id + date_for = datetime.datetime.today().date() - relativedelta(days=days_back) + cdm, created = pipeline_cdm.CourseDailyMetricsLoader(course_id).load(date_for=date_for) + assert cdm.average_progress == expected_val + @pytest.mark.skip('Implement me!') def test_load_force_update(self): pass From a5eef97610e71bed57f5c88de11dec7d1cc56771 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Wed, 9 Jun 2021 18:09:12 -0700 Subject: [PATCH 19/26] Test BaseBackfillCommand --- .../commands/backfill_daily_metrics.py | 1 - tests/test_commands.py | 108 ++++++++++++++---- 2 files changed, 88 insertions(+), 21 deletions(-) diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index bc041562..24224955 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -20,7 +20,6 @@ class Command(BaseBackfillCommand): '''Populate Figures daily metrics models (``CourseDailyMetrics`` and ``SiteDailyMetrics``). - Note that correctly populating cumulative user and course count for ``SiteDailyMetrics`` relies on running this sequentially forward from the first date for which StudentModule records are present. diff --git a/tests/test_commands.py b/tests/test_commands.py index 267a96f3..e8f384be 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -2,28 +2,105 @@ """ from __future__ import absolute_import + +from dateutil import parser import mock import pytest + +from django.contrib.sites.models import Site from django.core.management import call_command -from django.test import TestCase -from django.utils.six import StringIO +from figures.management.commands import BaseBackfillCommand from tests.factories import SiteFactory -from tests.helpers import OPENEDX_RELEASE, GINKGO -class PopulateFiguresMetricsTest(TestCase): +@pytest.mark.django_db +class TestBaseBackfillCommand(object): + """Exercise common backfill command base class methods.""" + + @pytest.mark.parametrize('site, expected_result', [ + ('1', [1]), + (None, [1, 2, 3]), + ('site-1.example.com', [3]), + ('4', []), + ('site-88.example.com', []) + ]) + def test_get_side_ids(self, site, expected_result): + """Test that get_site_ids will return the correct Site id if passed that id + or the corresponding Site's domain, will return all Site ids if none passed, + and will return an empty list if a non-existing Site id or domain passed + """ + # Site table will have an existing example.com Site as well as whatever we + # create w/ SiteFactory, due to data migration + example_site = Site.objects.get(domain='example.com') + SiteFactory.reset_sequence(0) + site1 = SiteFactory() # site-0.example.com + site2 = SiteFactory() # site-1.example.com + with mock.patch('figures.management.commands.get_sites') as mock_get_sites: + mock_get_sites.return_value = [example_site, site1, site2] + site_ids = BaseBackfillCommand().get_site_ids(site) + assert site_ids == expected_result + + +@pytest.mark.django_db +class TestBackfillDailyMetrics(object): + """Exercise backfill_daily_metrics command.""" + + BASE_PATH = 'figures.management.commands.backfill_daily_metrics' + PLAIN_PATH = BASE_PATH + '.populate_daily_metrics' + DELAY_PATH = PLAIN_PATH + '.delay' + EXP_PATH = BASE_PATH + '.experimental_populate_daily_metrics' - @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, - reason='Broken test. Apparent Django 1.8 incompatibility') - def test_command_output(self): - out = StringIO() - call_command('populate_figures_metrics', '--no-delay', stdout=out) + def test_backfill_daily_func(self): + """Test backfill daily regular and experimental. + """ + with mock.patch(self.PLAIN_PATH) as mock_populate: + call_command('backfill_daily_metrics', no_delay=True) + mock_populate.assert_called() + with mock.patch(self.EXP_PATH) as mock_populate_exp: + call_command('backfill_daily_metrics', experimental=True, no_delay=True) + mock_populate_exp.assert_called() - self.assertEqual('', out.getvalue()) + def test_backfill_daily_delay(self): + """Test backfill daily called without no_delay uses Celery task delay. + """ + with mock.patch(self.DELAY_PATH) as mock_populate: + call_command('backfill_daily_metrics') + mock_populate.assert_called() + def test_backfill_daily_dates(self): + """Test backfill daily called with start and end dates + """ + end = '2021-06-08' + start = '2021-01-01' + start_dt = parser.parse(start) + end_dt = parser.parse(end) + exp_days = abs((end_dt - start_dt).days) + 1 + with mock.patch(self.PLAIN_PATH) as mock_populate: + call_command('backfill_daily_metrics', date_start=start, date_end=end, no_delay=True) + assert mock_populate.call_count == exp_days -def test_mau_no_delay(transactional_db): + def test_backfill_daily_same_day(self): + """Test backfill daily called with same start and end date. + """ + start = '2021-06-08' + end = '2021-06-08' + exp_days = 1 + with mock.patch(self.PLAIN_PATH) as mock_populate: + call_command('backfill_daily_metrics', date_start=start, date_end=end, no_delay=True) + assert mock_populate.call_count == exp_days + + def test_backfill_daily_for_site(self): + """Test that proper site id gets passed to task func. Doesn't exercise get_side_ids.""" + with mock.patch('figures.management.commands.BaseBackfillCommand.get_site_ids') as mock_get_site_ids: + mock_get_site_ids.return_value = [1,] + with mock.patch(self.PLAIN_PATH) as mock_populate: + call_command('backfill_daily_metrics', no_delay=True) + assert mock_populate.called_with(site_id=1) + + +@pytest.mark.skip() +def test_mau_no_delay(self): """ We test that `populate_figures_metrics` command executes the method, `figures.tasks.populate_all_mau` in immediate mode "no delay" @@ -32,12 +109,3 @@ def test_mau_no_delay(transactional_db): with mock.patch(path) as mock_populate_all_mau: call_command('populate_figures_metrics', '--no-delay', '--mau') mock_populate_all_mau.assert_called() - - -def test_backfill(transactional_db): - """Minimal test the backfill management command - """ - path = 'figures.management.commands.backfill_figures_metrics.backfill_monthly_metrics_for_site' - with mock.patch(path) as mock_backfill: - call_command('backfill_figures_metrics') - mock_backfill.assert_called() From 0640cbf5c3b9b7329c060f9f7d8836de133fc042 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Wed, 9 Jun 2021 18:10:49 -0700 Subject: [PATCH 20/26] Remove unneeded test, command args flake8 --- figures/backfill.py | 2 +- figures/management/commands/__init__.py | 2 +- .../commands/backfill_daily_metrics.py | 20 +++++++++++++------ .../commands/repair_backfilled_progress.py | 8 +++++--- figures/pipeline/course_daily_metrics.py | 5 +++-- tests/test_commands.py | 12 ----------- 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/figures/backfill.py b/figures/backfill.py index f58d74ea..cf69f8b6 100644 --- a/figures/backfill.py +++ b/figures/backfill.py @@ -19,7 +19,7 @@ from figures.models import EnrollmentData -def backfill_monthly_metrics_for_site(site, month_for=None, overwrite=False): +def backfill_monthly_metrics_for_site(site, overwrite=False): """Backfill specified months' historical site metrics for the specified site """ site_sm = get_student_modules_for_site(site) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index cafc5af4..8e41fe91 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -68,7 +68,7 @@ def add_arguments(self, parser): help='Overwrite metrics records if they exist for the given date' ) - def print_exc(metrics_type, date, exc_message): + def print_exc(self, metrics_type, date, exc_message): print("Could not populate {} for {}. Exception was {}".format( metrics_type, date, exc_message) ) diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 24224955..3b61850f 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -1,15 +1,16 @@ '''Management command to manually populate course and site daily metrics -see the models ``figures.models.CourseDailyMetrics`` and ``figures.models.SiteDailyMetrics`` +See the models ``figures.models.CourseDailyMetrics`` and ``figures.models.SiteDailyMetrics`` ''' from __future__ import print_function from __future__ import absolute_import -from dateutil.rrule import rrule, DAILY from textwrap import dedent +from dateutil.rrule import rrule, DAILY + from figures.tasks import ( populate_daily_metrics, experimental_populate_daily_metrics @@ -19,7 +20,7 @@ class Command(BaseBackfillCommand): - '''Populate Figures daily metrics models (``CourseDailyMetrics`` and ``SiteDailyMetrics``). + '''Populate Figures daily metrics models (``CourseDailyMetrics`` and ``SiteDailyMetrics``). Note that correctly populating cumulative user and course count for ``SiteDailyMetrics`` relies on running this sequentially forward from the first date for which StudentModule records are present. @@ -47,7 +48,9 @@ def handle(self, *args, **options): experimental = options['experimental'] - print('BEGIN RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) + print('BEGIN RANGE: Backfilling Figures daily metrics for dates {} to {}'.format( + date_start, date_end + )) # populate daily metrics one day at a time for date range for dt in rrule(DAILY, dtstart=date_start, until=date_end): @@ -60,7 +63,10 @@ def handle(self, *args, **options): force_update=options['overwrite'] ) - metrics_func = experimental_populate_daily_metrics if experimental else populate_daily_metrics + if experimental: + metrics_func = experimental_populate_daily_metrics + else: + metrics_func = populate_daily_metrics # try: if options['no_delay']: metrics_func(**kwargs) @@ -74,4 +80,6 @@ def handle(self, *args, **options): print('END: Backfill Figures daily metrics metrics for: {}'.format(dt)) - print('END RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(date_start, date_end)) + print('END RANGE: Backfilling Figures daily metrics for dates {} to {}'.format( + date_start, date_end + )) diff --git a/figures/management/commands/repair_backfilled_progress.py b/figures/management/commands/repair_backfilled_progress.py index 0c612cd0..53e0619b 100644 --- a/figures/management/commands/repair_backfilled_progress.py +++ b/figures/management/commands/repair_backfilled_progress.py @@ -1,5 +1,5 @@ ''' -Management command to set as None any CourseDailyMetrics.average_progress values +Management command to set as None any CourseDailyMetrics.average_progress values if produced by a backfill. We do this because progress or any other value generated by examining StudentModule @@ -63,11 +63,13 @@ def handle(self, *args, **options): date_start=backfills.earliest('date_for').date_for, date_end=backfills.latest('date_for').date_for, courses='\n'.join(set(backfills.values_list('course_id', flat=True))), - dry_run_msg = '\n\nDRY RUN. Not updating records.' if options['dry_run'] else '' + dry_run_msg='\n\nDRY RUN. Not updating records.' if options['dry_run'] else '' ) ) print(logmsg) if not options['dry_run']: - print('FIGURES: set average_progress to None for {} CourseDailyMetrics records'.format(num_backfills)) + print('FIGURES: set average_progress to None for {} CourseDailyMetrics records'.format( + num_backfills + )) backfills.update(average_progress=None) diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index b08202d5..8c51d571 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -264,14 +264,15 @@ def extract(self, course_id, date_for, **_kwargs): data['active_learners_today'] = active_learners_today # Average progress - # Progress data cannot be reliable for backfills or for any date prior to yesterday + # Progress data cannot be reliable for backfills or for any date prior to yesterday # without using StudentModuleHistory so we skip getting this data if running # for a day earlier than previous day (i.e., not during daily update of CDMs), # especially since it is so expensive to calculate. # Note that Avg() applied across null and decimal vals for aggregate average_progress # will correctly ignore nulls # TODO: Reconsider this if we implement either StudentModuleHistory-based queries - # (if so, you will need to add any types you want to StudentModuleHistory.HISTORY_SAVING_TYPES) + # (if so, you will need to add any types you want to + # StudentModuleHistory.HISTORY_SAVING_TYPES) # TODO: Reconsider this once we switch to using Persistent Grades if is_past_date(date_for + relativedelta(days=1)): # more than 1 day in past data['average_progress'] = None diff --git a/tests/test_commands.py b/tests/test_commands.py index e8f384be..4380ae48 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -97,15 +97,3 @@ def test_backfill_daily_for_site(self): with mock.patch(self.PLAIN_PATH) as mock_populate: call_command('backfill_daily_metrics', no_delay=True) assert mock_populate.called_with(site_id=1) - - -@pytest.mark.skip() -def test_mau_no_delay(self): - """ - We test that `populate_figures_metrics` command executes the method, - `figures.tasks.populate_all_mau` in immediate mode "no delay" - """ - path = 'figures.management.commands.populate_figures_metrics.populate_all_mau' - with mock.patch(path) as mock_populate_all_mau: - call_command('populate_figures_metrics', '--no-delay', '--mau') - mock_populate_all_mau.assert_called() From ff635ccea04fc83f680464473ccc7db47e447881 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Wed, 9 Jun 2021 18:34:43 -0700 Subject: [PATCH 21/26] Fix update_enrollment_data task call in backfill_enrollment_data --- figures/management/commands/backfill_enrollment_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/figures/management/commands/backfill_enrollment_data.py b/figures/management/commands/backfill_enrollment_data.py index aadec5c8..46a389da 100644 --- a/figures/management/commands/backfill_enrollment_data.py +++ b/figures/management/commands/backfill_enrollment_data.py @@ -24,8 +24,8 @@ def handle(self, *args, **options): for site_id in self.get_site_ids(options['site']): print('Updating EnrollmentData for site {}'.format(site_id)) if options['no_delay']: - update_enrollment_data(site_ids=(site_id)) + update_enrollment_data(site_id=site_id) else: - update_enrollment_data.delay(site_ids=(site_id)) # pragma: no cover + update_enrollment_data.delay(site_id=site_id) # pragma: no cover print('DONE: Backfill Figures EnrollmentData') From 724cbba2f335f904379daf1951fa7914db6a1697 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Wed, 9 Jun 2021 18:40:52 -0700 Subject: [PATCH 22/26] Remove unimplemented site_id param for experimental_pop_daily_metrics --- figures/management/commands/backfill_daily_metrics.py | 1 + figures/tasks.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index 3b61850f..ddb1180d 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -65,6 +65,7 @@ def handle(self, *args, **options): if experimental: metrics_func = experimental_populate_daily_metrics + del kwargs['site_id'] # not implemented for experimental else: metrics_func = populate_daily_metrics # try: diff --git a/figures/tasks.py b/figures/tasks.py index 93a38556..5cb537e8 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -264,7 +264,7 @@ def populate_daily_metrics(site_id=None, date_for=None, force_update=False): @shared_task -def experimental_populate_daily_metrics(site_id=None, date_for=None, force_update=False): +def experimental_populate_daily_metrics(date_for=None, force_update=False): '''Experimental task to populate daily metrics WARNING: In Ginkgo devstack, this task tends to gets stuck in the middle of From ae64f072c1d27fde3210f6e23578a5a1aec6ca40 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Fri, 11 Jun 2021 14:23:20 -0700 Subject: [PATCH 23/26] Move BaseBackfillCommand to a new figures.management.base module --- figures/management/base.py | 74 +++++++++++++++++++ figures/management/commands/__init__.py | 73 +----------------- .../commands/backfill_daily_metrics.py | 3 +- .../commands/backfill_enrollment_data.py | 3 +- .../commands/backfill_monthly_metrics.py | 3 +- .../commands/repair_backfilled_progress.py | 3 +- tests/test_commands.py | 7 +- 7 files changed, 83 insertions(+), 83 deletions(-) create mode 100644 figures/management/base.py diff --git a/figures/management/base.py b/figures/management/base.py new file mode 100644 index 00000000..34d2af4d --- /dev/null +++ b/figures/management/base.py @@ -0,0 +1,74 @@ +""" +Management command base classes for Figures. +""" +from datetime import datetime + +from django.contrib.sites.models import Site +from django.core.management.base import BaseCommand + +from figures import helpers +from figures.sites import get_sites + + +class BaseBackfillCommand(BaseCommand): + '''Base class for Figures backfill management commands with common options. + ''' + def get_site_ids(self, identifier=None): + """Quick-n-dirty function to let the caller choose the site id or domain. + If no identifier is passed, return all available Sites. + Let the 'get' fail if record can't be found from the identifier. + Returns Site ids for passing to Celery tasks. + Note that at present, none of the tasks handle more than one specified Site. + """ + if not identifier: + sites = get_sites() + else: + try: + filter_arg = dict(pk=int(identifier)) + except ValueError: + filter_arg = dict(domain=identifier) + sites = Site.objects.filter(**filter_arg) + return [site.id for site in sites] + + def get_date(self, date_str=None): + '''Return a datetime.date from a string or NoneType. + ''' + try: + return helpers.as_date(date_str) + except TypeError: + return datetime.today().date() + + def add_arguments(self, parser): + ''' + ''' + parser.add_argument( + '--site', + help='backfill a specific site. provide numeric id or domain name', + default=None + ) + # TODO: handle date start later than date end + parser.add_argument( + '--date_start', + help='date for which we start backfilling data', + ) + parser.add_argument( + '--date_end', + help='date for which we end backfilling data', + ) + parser.add_argument( + '--no-delay', + action='store_true', + default=False, + help='Disable the celery "delay" directive' + ) + parser.add_argument( + '--overwrite', + action='store_true', + default=False, + help='Overwrite metrics records if they exist for the given date' + ) + + def print_exc(self, metrics_type, date, exc_message): + print("Could not populate {} for {}. Exception was {}".format( + metrics_type, date, exc_message) + ) diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index 8e41fe91..7b3ca0e5 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -1,74 +1,3 @@ """ Management commands for Figures. -""" -from datetime import datetime - -from django.contrib.sites.models import Site -from django.core.management.base import BaseCommand - -from figures import helpers -from figures.sites import get_sites - - -class BaseBackfillCommand(BaseCommand): - '''Base class for Figures backfill management commands with common options. - ''' - def get_site_ids(self, identifier=None): - """Quick-n-dirty function to let the caller choose the site id or domain. - If no identifier is passed, return all available Sites. - Let the 'get' fail if record can't be found from the identifier. - Returns Site ids for passing to Celery tasks. - Note that at present, none of the tasks handle more than one specified Site. - """ - if not identifier: - sites = get_sites() - else: - try: - filter_arg = dict(pk=int(identifier)) - except ValueError: - filter_arg = dict(domain=identifier) - sites = Site.objects.filter(**filter_arg) - return [site.id for site in sites] - - def get_date(self, date_str=None): - '''Return a datetime.date from a string or NoneType. - ''' - try: - return helpers.as_date(date_str) - except TypeError: - return datetime.today().date() - - def add_arguments(self, parser): - ''' - ''' - parser.add_argument( - '--site', - help='backfill a specific site. provide numeric id or domain name', - default=None - ) - # TODO: handle date start later than date end - parser.add_argument( - '--date_start', - help='date for which we start backfilling data', - ) - parser.add_argument( - '--date_end', - help='date for which we end backfilling data', - ) - parser.add_argument( - '--no-delay', - action='store_true', - default=False, - help='Disable the celery "delay" directive' - ) - parser.add_argument( - '--overwrite', - action='store_true', - default=False, - help='Overwrite metrics records if they exist for the given date' - ) - - def print_exc(self, metrics_type, date, exc_message): - print("Could not populate {} for {}. Exception was {}".format( - metrics_type, date, exc_message) - ) +""" \ No newline at end of file diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_daily_metrics.py index ddb1180d..01142264 100644 --- a/figures/management/commands/backfill_daily_metrics.py +++ b/figures/management/commands/backfill_daily_metrics.py @@ -11,13 +11,12 @@ from dateutil.rrule import rrule, DAILY +from figures.management.base import BaseBackfillCommand from figures.tasks import ( populate_daily_metrics, experimental_populate_daily_metrics ) -from . import BaseBackfillCommand - class Command(BaseBackfillCommand): '''Populate Figures daily metrics models (``CourseDailyMetrics`` and ``SiteDailyMetrics``). diff --git a/figures/management/commands/backfill_enrollment_data.py b/figures/management/commands/backfill_enrollment_data.py index 46a389da..e22ad166 100644 --- a/figures/management/commands/backfill_enrollment_data.py +++ b/figures/management/commands/backfill_enrollment_data.py @@ -8,10 +8,9 @@ from textwrap import dedent +from figures.management.base import BaseBackfillCommand from figures.tasks import update_enrollment_data -from . import BaseBackfillCommand - class Command(BaseBackfillCommand): """Backfill Figures EnrollmentData model. diff --git a/figures/management/commands/backfill_monthly_metrics.py b/figures/management/commands/backfill_monthly_metrics.py index 48bb7772..5e89b8b8 100644 --- a/figures/management/commands/backfill_monthly_metrics.py +++ b/figures/management/commands/backfill_monthly_metrics.py @@ -10,8 +10,7 @@ from django.contrib.sites.models import Site from figures.backfill import backfill_monthly_metrics_for_site - -from . import BaseBackfillCommand +from figures.management.base import BaseBackfillCommand def backfill_site(site, overwrite): diff --git a/figures/management/commands/repair_backfilled_progress.py b/figures/management/commands/repair_backfilled_progress.py index 53e0619b..195c4ce0 100644 --- a/figures/management/commands/repair_backfilled_progress.py +++ b/figures/management/commands/repair_backfilled_progress.py @@ -15,10 +15,9 @@ from django.contrib.sites.models import Site from django.db.models import Count, F +from figures.management.base import BaseBackfillCommand from figures.models import CourseDailyMetrics -from . import BaseBackfillCommand - class Command(BaseBackfillCommand): '''Set all CourseDailyMetrics average_progress values to None where CDM was created diff --git a/tests/test_commands.py b/tests/test_commands.py index 4380ae48..0f18a092 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -10,7 +10,8 @@ from django.contrib.sites.models import Site from django.core.management import call_command -from figures.management.commands import BaseBackfillCommand +from figures.management.base import BaseBackfillCommand + from tests.factories import SiteFactory @@ -36,7 +37,7 @@ def test_get_side_ids(self, site, expected_result): SiteFactory.reset_sequence(0) site1 = SiteFactory() # site-0.example.com site2 = SiteFactory() # site-1.example.com - with mock.patch('figures.management.commands.get_sites') as mock_get_sites: + with mock.patch('figures.management.base.get_sites') as mock_get_sites: mock_get_sites.return_value = [example_site, site1, site2] site_ids = BaseBackfillCommand().get_site_ids(site) assert site_ids == expected_result @@ -92,7 +93,7 @@ def test_backfill_daily_same_day(self): def test_backfill_daily_for_site(self): """Test that proper site id gets passed to task func. Doesn't exercise get_side_ids.""" - with mock.patch('figures.management.commands.BaseBackfillCommand.get_site_ids') as mock_get_site_ids: + with mock.patch('figures.management.base.BaseBackfillCommand.get_site_ids') as mock_get_site_ids: mock_get_site_ids.return_value = [1,] with mock.patch(self.PLAIN_PATH) as mock_populate: call_command('backfill_daily_metrics', no_delay=True) From 4f166faa74b58a13a68cc34e13b3e791ef42433d Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Fri, 11 Jun 2021 14:37:23 -0700 Subject: [PATCH 24/26] Rename management commands to include "_figures" in all names --- figures/backfill.py | 4 ++-- ...rics.py => backfill_figures_daily_metrics.py} | 0 ...ta.py => backfill_figures_enrollment_data.py} | 0 ...cs.py => backfill_figures_monthly_metrics.py} | 0 ....py => repair_figures_backfilled_progress.py} | 0 tests/test_commands.py | 16 ++++++++-------- 6 files changed, 10 insertions(+), 10 deletions(-) rename figures/management/commands/{backfill_daily_metrics.py => backfill_figures_daily_metrics.py} (100%) rename figures/management/commands/{backfill_enrollment_data.py => backfill_figures_enrollment_data.py} (100%) rename figures/management/commands/{backfill_monthly_metrics.py => backfill_figures_monthly_metrics.py} (100%) rename figures/management/commands/{repair_backfilled_progress.py => repair_figures_backfilled_progress.py} (100%) diff --git a/figures/backfill.py b/figures/backfill.py index cf69f8b6..889f653c 100644 --- a/figures/backfill.py +++ b/figures/backfill.py @@ -12,8 +12,8 @@ from figures.compat import CourseNotFound from figures.sites import ( - get_course_enrollments_for_site, - get_student_modules_for_site + get_course_enrollments_for_site, + get_student_modules_for_site ) from figures.pipeline.site_monthly_metrics import fill_month from figures.models import EnrollmentData diff --git a/figures/management/commands/backfill_daily_metrics.py b/figures/management/commands/backfill_figures_daily_metrics.py similarity index 100% rename from figures/management/commands/backfill_daily_metrics.py rename to figures/management/commands/backfill_figures_daily_metrics.py diff --git a/figures/management/commands/backfill_enrollment_data.py b/figures/management/commands/backfill_figures_enrollment_data.py similarity index 100% rename from figures/management/commands/backfill_enrollment_data.py rename to figures/management/commands/backfill_figures_enrollment_data.py diff --git a/figures/management/commands/backfill_monthly_metrics.py b/figures/management/commands/backfill_figures_monthly_metrics.py similarity index 100% rename from figures/management/commands/backfill_monthly_metrics.py rename to figures/management/commands/backfill_figures_monthly_metrics.py diff --git a/figures/management/commands/repair_backfilled_progress.py b/figures/management/commands/repair_figures_backfilled_progress.py similarity index 100% rename from figures/management/commands/repair_backfilled_progress.py rename to figures/management/commands/repair_figures_backfilled_progress.py diff --git a/tests/test_commands.py b/tests/test_commands.py index 0f18a092..bafd7c8e 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -45,9 +45,9 @@ def test_get_side_ids(self, site, expected_result): @pytest.mark.django_db class TestBackfillDailyMetrics(object): - """Exercise backfill_daily_metrics command.""" + """Exercise backfill_figures_daily_metrics command.""" - BASE_PATH = 'figures.management.commands.backfill_daily_metrics' + BASE_PATH = 'figures.management.commands.backfill_figures_daily_metrics' PLAIN_PATH = BASE_PATH + '.populate_daily_metrics' DELAY_PATH = PLAIN_PATH + '.delay' EXP_PATH = BASE_PATH + '.experimental_populate_daily_metrics' @@ -56,17 +56,17 @@ def test_backfill_daily_func(self): """Test backfill daily regular and experimental. """ with mock.patch(self.PLAIN_PATH) as mock_populate: - call_command('backfill_daily_metrics', no_delay=True) + call_command('backfill_figures_daily_metrics', no_delay=True) mock_populate.assert_called() with mock.patch(self.EXP_PATH) as mock_populate_exp: - call_command('backfill_daily_metrics', experimental=True, no_delay=True) + call_command('backfill_figures_daily_metrics', experimental=True, no_delay=True) mock_populate_exp.assert_called() def test_backfill_daily_delay(self): """Test backfill daily called without no_delay uses Celery task delay. """ with mock.patch(self.DELAY_PATH) as mock_populate: - call_command('backfill_daily_metrics') + call_command('backfill_figures_daily_metrics') mock_populate.assert_called() def test_backfill_daily_dates(self): @@ -78,7 +78,7 @@ def test_backfill_daily_dates(self): end_dt = parser.parse(end) exp_days = abs((end_dt - start_dt).days) + 1 with mock.patch(self.PLAIN_PATH) as mock_populate: - call_command('backfill_daily_metrics', date_start=start, date_end=end, no_delay=True) + call_command('backfill_figures_daily_metrics', date_start=start, date_end=end, no_delay=True) assert mock_populate.call_count == exp_days def test_backfill_daily_same_day(self): @@ -88,7 +88,7 @@ def test_backfill_daily_same_day(self): end = '2021-06-08' exp_days = 1 with mock.patch(self.PLAIN_PATH) as mock_populate: - call_command('backfill_daily_metrics', date_start=start, date_end=end, no_delay=True) + call_command('backfill_figures_daily_metrics', date_start=start, date_end=end, no_delay=True) assert mock_populate.call_count == exp_days def test_backfill_daily_for_site(self): @@ -96,5 +96,5 @@ def test_backfill_daily_for_site(self): with mock.patch('figures.management.base.BaseBackfillCommand.get_site_ids') as mock_get_site_ids: mock_get_site_ids.return_value = [1,] with mock.patch(self.PLAIN_PATH) as mock_populate: - call_command('backfill_daily_metrics', no_delay=True) + call_command('backfill_figures_daily_metrics', no_delay=True) assert mock_populate.called_with(site_id=1) From 459b181d51c2c7b2b669998cd7d1786e3c2ce4d4 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 14 Jun 2021 13:47:05 -0700 Subject: [PATCH 25/26] pend deprecation populate_figures_metrics, replace functionality pass --mau usage to new run_figures_mau_metrics pass other usage to backfill_figures_daily_metrics (with start and end date of single date passed ) --- .../commands/populate_figures_metrics.py | 80 +++++++++++++++++++ .../commands/run_figures_mau_metrics.py | 36 +++++++++ 2 files changed, 116 insertions(+) create mode 100644 figures/management/commands/populate_figures_metrics.py create mode 100644 figures/management/commands/run_figures_mau_metrics.py diff --git a/figures/management/commands/populate_figures_metrics.py b/figures/management/commands/populate_figures_metrics.py new file mode 100644 index 00000000..8f25ed02 --- /dev/null +++ b/figures/management/commands/populate_figures_metrics.py @@ -0,0 +1,80 @@ +""" +Deprecated: +Please call instead backfill_figures_daily_metrics. + +Management command to manually populate course metrics + +see the model ``edx_figures.models.CourseDailyMetrics`` +""" + +from __future__ import print_function + +from __future__ import absolute_import +from textwrap import dedent +import warnings + +from django.core.management import call_command +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + '''Populate Figures metrics models + ''' + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + ''' + ''' + + parser.add_argument('--date', + help='date for which we are retrieving data in yyyy-mm-dd format') + parser.add_argument('--no-delay', + action='store_true', + default=False, + help='Disable the celery "delay" directive') + parser.add_argument('--force-update', + action='store_true', + default=False, + help='Overwrite metrics records if they exist for the given date') + parser.add_argument('--experimental', + action='store_true', + default=False, + help=('Run with Celery workflows (Warning: This is still under' + + ' development and likely to get stuck/hung jobs')) + parser.add_argument('--mau', + action='store_true', + default=False, + help='Run just the MAU pipeline') + + def handle(self, *args, **options): + ''' + Pending deprecation. Passes handling off to new commands. + + The 'mau' conditional check in this method is a quick hack to run the + MAU task from this command. What we probably want is a 'figures_cli' + command with subcommands. + ''' + warnings.warn( + "populate_figures_metrics is pending deprecation and will be removed in " + "Figures 1.0. Please use backfill_figures_daily_metrics, instead; or, " + "if you were calling with --mau option, use populate_figures_mau_metrics.", + PendingDeprecationWarning + ) + print('populating Figures metrics...') + + if options['mau']: + call_command('run_figures_mau_metrics', no_delay=options['no_delay']) + else: + call_command( + 'backfill_figures_daily_metrics', + no_delay=options['no_delay'], + date_start=options['date'], + date_end=options['date'], + overwrite=options['force_update'], + experimental=options['experimental'] + ) + + # TODO: improve this message to say 'today' when options['date'] is None + print('Management command populate_figures_metrics complete. date_for: {}'.format( + options['date'])) + print('Done.') diff --git a/figures/management/commands/run_figures_mau_metrics.py b/figures/management/commands/run_figures_mau_metrics.py new file mode 100644 index 00000000..f7d7d5a3 --- /dev/null +++ b/figures/management/commands/run_figures_mau_metrics.py @@ -0,0 +1,36 @@ +"""Figures management command to run course MAU metrics for all courses, all Sites. +""" + +from __future__ import print_function + +from __future__ import absolute_import + +from textwrap import dedent + +from django.core.management.base import BaseCommand + +from figures.tasks import ( + populate_all_mau +) + + +class Command(BaseCommand): + """Task runner to kick off Figures celery tasks + """ + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument('--no-delay', + action='store_true', + default=False, + help='Disable the celery "delay" directive') + + def handle(self, *args, **options): + print('Starting Figures MAU metrics for all Sites...') + + if options['no_delay']: + populate_all_mau() + else: + populate_all_mau.delay() + + print('Done.') From 7ae08e331cce19de23fd79a08253e6013e9bfab6 Mon Sep 17 00:00:00 2001 From: Bryan Wilson Date: Mon, 14 Jun 2021 13:48:06 -0700 Subject: [PATCH 26/26] Add back backfill_figures_metrics, pending deprecation Calls backfill_daily_metrics and backfill_monthly_metrics, w/options --- figures/management/__init__.py | 2 + figures/management/base.py | 1 + figures/management/commands/__init__.py | 3 +- .../commands/backfill_figures_metrics.py | 57 ++++++++++++++++ tests/test_commands.py | 67 +++++++++++++++++++ 5 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 figures/management/commands/backfill_figures_metrics.py diff --git a/figures/management/__init__.py b/figures/management/__init__.py index e69de29b..c0f6f287 100644 --- a/figures/management/__init__.py +++ b/figures/management/__init__.py @@ -0,0 +1,2 @@ +""" +""" diff --git a/figures/management/base.py b/figures/management/base.py index 34d2af4d..418e9839 100644 --- a/figures/management/base.py +++ b/figures/management/base.py @@ -41,6 +41,7 @@ def get_date(self, date_str=None): def add_arguments(self, parser): ''' ''' + # TODO: allow passing the queue to use. Warn if no_delay specified. parser.add_argument( '--site', help='backfill a specific site. provide numeric id or domain name', diff --git a/figures/management/commands/__init__.py b/figures/management/commands/__init__.py index 7b3ca0e5..3b3b17a0 100644 --- a/figures/management/commands/__init__.py +++ b/figures/management/commands/__init__.py @@ -1,3 +1,2 @@ +"""Django management commands for Figures. """ -Management commands for Figures. -""" \ No newline at end of file diff --git a/figures/management/commands/backfill_figures_metrics.py b/figures/management/commands/backfill_figures_metrics.py new file mode 100644 index 00000000..484a5b63 --- /dev/null +++ b/figures/management/commands/backfill_figures_metrics.py @@ -0,0 +1,57 @@ +"""Deprecated: +Please call instead one of: +backfill_figures_daily_metrics, backfill_figures_monthly_metrics, or +backfill_figures_enrollment_data + +Backfills Figures historical metrics + +""" + +from __future__ import print_function + +from __future__ import absolute_import +from textwrap import dedent +import warnings + +from django.core.management import call_command +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + """Pending Deprecation: Populate Figures metrics models + """ + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument('--overwrite', + action='store_true', + default=False, + help='overwrite existing data in SiteMonthlyMetrics') + parser.add_argument('--site', + help='backfill a specific site. provide id or domain name') + + def handle(self, *args, **options): + ''' + Pending deprecation. Passes handling off to new commands. + ''' + warnings.warn( + "backfill_figures_metrics is pending deprecation and will be removed in " + "Figures 1.0. Please use one of backfill_figures_daily_metrics, " + "backfill_figures_monthly_metrics, or backfill_figures_enrollment_data, " + "instead.", + PendingDeprecationWarning + ) + print('BEGIN: Backfill Figures Metrics') + + call_command( + 'backfill_figures_monthly_metrics', + overwrite=options['overwrite'], + site=options['site'] + ) + call_command( + 'backfill_figures_daily_metrics', + overwrite=options['overwrite'], + site=options['site'] + ) + + print('DONE: Backfill Figures Metrics') diff --git a/tests/test_commands.py b/tests/test_commands.py index bafd7c8e..3387e319 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -98,3 +98,70 @@ def test_backfill_daily_for_site(self): with mock.patch(self.PLAIN_PATH) as mock_populate: call_command('backfill_figures_daily_metrics', no_delay=True) assert mock_populate.called_with(site_id=1) + + +class TestPopulateFiguresMetricsCommand(object): + """Test that command gives a pending deprecation warning and that it calls the correct + substitute management commands based on passed options. + """ + + def test_pending_deprecation(self): + mock_call_path = 'figures.management.commands.populate_figures_metrics.call_command' + with mock.patch(mock_call_path): + with pytest.warns(PendingDeprecationWarning): + call_command('populate_figures_metrics') + + @pytest.mark.parametrize('options, subst_command, subst_call_options', [ + ( + {'mau': True, 'no_delay': None, 'date': '2021-06-14', 'experimental': None}, + 'run_figures_mau_metrics', + {'no_delay': None} + ), + ( + { + 'mau': False, 'no_delay': True, 'date': '2021-06-14', + 'experimental': True, 'force_update': True + }, + 'backfill_figures_daily_metrics', + { + 'no_delay': True, 'experimental': True, 'overwrite': True, + 'date_start': '2021-06-14', 'date_end': '2021-06-14' + } + ) + ]) + def test_correct_subtitute_commands_called(self, options, subst_command, subst_call_options): + old_pop_cmd = 'figures.management.commands.populate_figures_metrics.call_command' + with mock.patch(old_pop_cmd) as mock_call_cmd: + call_command('populate_figures_metrics', **options) # this isn't the patched version of call_command + mock_call_cmd.assert_called_with(subst_command, **subst_call_options) + + +class TestBackfillFiguresMetricsCommand(object): + """Test that command gives a pending deprecation warning and that it calls the correct + substitute management commands based on passed options. + """ + + def test_pending_deprecation(self): + mock_call_path = 'figures.management.commands.backfill_figures_metrics.call_command' + with mock.patch(mock_call_path): + with pytest.warns(PendingDeprecationWarning): + call_command('backfill_figures_metrics') + + @pytest.mark.parametrize('options, subst_call_options', [ + ( + {'site': 1, 'overwrite': None}, + {'site': 1, 'overwrite': None} + ), + ( + {'overwrite': True}, + {'site': None, 'overwrite': True} + ), + ]) + def test_correct_subtitute_commands_called(self, options, subst_call_options): + old_pop_cmd = 'figures.management.commands.backfill_figures_metrics.call_command' + with mock.patch(old_pop_cmd) as mock_call_cmd: + call_command('backfill_figures_metrics', **options) # this isn't the patched version of call_command + dailycmd = 'backfill_figures_daily_metrics' + monthlycmd = 'backfill_figures_monthly_metrics' + mock_call_cmd.assert_any_call(dailycmd, **subst_call_options) + mock_call_cmd.assert_any_call(monthlycmd, **subst_call_options)