Skip to content

Commit

Permalink
Merge pull request #352 from appsembler/bryan/flexible-backfill-commands
Browse files Browse the repository at this point in the history
Refactor and fix Backfill commands
  • Loading branch information
bryanlandia authored Jun 16, 2021
2 parents e283d8c + 7ae08e3 commit 4b40155
Show file tree
Hide file tree
Showing 20 changed files with 631 additions and 180 deletions.
6 changes: 3 additions & 3 deletions figures/backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@

from figures.compat import CourseNotFound
from figures.sites import (
get_course_enrollments_for_site,
get_student_modules_for_site
get_course_enrollments_for_site,
get_student_modules_for_site
)
from figures.pipeline.site_monthly_metrics import fill_month
from figures.models import EnrollmentData


def backfill_monthly_metrics_for_site(site, overwrite=False):
"""Backfill all historical site metrics for the specified site
"""Backfill specified months' historical site metrics for the specified site
"""
site_sm = get_student_modules_for_site(site)
if not site_sm:
Expand Down
4 changes: 4 additions & 0 deletions figures/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ def days_in_month(month_for):
return num_days_in_month


def is_past_date(val):
return as_date(val) < datetime.date.today()


# TODO: Consider changing name to 'months_back_iterator' or similar
def previous_months_iterator(month_for, months_back):
"""Iterator returns a year,month tuple for n months including the month_for.
Expand Down
2 changes: 2 additions & 0 deletions figures/management/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"""
"""
75 changes: 75 additions & 0 deletions figures/management/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
Management command base classes for Figures.
"""
from datetime import datetime

from django.contrib.sites.models import Site
from django.core.management.base import BaseCommand

from figures import helpers
from figures.sites import get_sites


class BaseBackfillCommand(BaseCommand):
'''Base class for Figures backfill management commands with common options.
'''
def get_site_ids(self, identifier=None):
"""Quick-n-dirty function to let the caller choose the site id or domain.
If no identifier is passed, return all available Sites.
Let the 'get' fail if record can't be found from the identifier.
Returns Site ids for passing to Celery tasks.
Note that at present, none of the tasks handle more than one specified Site.
"""
if not identifier:
sites = get_sites()
else:
try:
filter_arg = dict(pk=int(identifier))
except ValueError:
filter_arg = dict(domain=identifier)
sites = Site.objects.filter(**filter_arg)
return [site.id for site in sites]

def get_date(self, date_str=None):
'''Return a datetime.date from a string or NoneType.
'''
try:
return helpers.as_date(date_str)
except TypeError:
return datetime.today().date()

def add_arguments(self, parser):
'''
'''
# TODO: allow passing the queue to use. Warn if no_delay specified.
parser.add_argument(
'--site',
help='backfill a specific site. provide numeric id or domain name',
default=None
)
# TODO: handle date start later than date end
parser.add_argument(
'--date_start',
help='date for which we start backfilling data',
)
parser.add_argument(
'--date_end',
help='date for which we end backfilling data',
)
parser.add_argument(
'--no-delay',
action='store_true',
default=False,
help='Disable the celery "delay" directive'
)
parser.add_argument(
'--overwrite',
action='store_true',
default=False,
help='Overwrite metrics records if they exist for the given date'
)

def print_exc(self, metrics_type, date, exc_message):
print("Could not populate {} for {}. Exception was {}".format(
metrics_type, date, exc_message)
)
2 changes: 2 additions & 0 deletions figures/management/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"""Django management commands for Figures.
"""
85 changes: 85 additions & 0 deletions figures/management/commands/backfill_figures_daily_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
'''Management command to manually populate course and site daily metrics
See the models ``figures.models.CourseDailyMetrics`` and ``figures.models.SiteDailyMetrics``
'''

from __future__ import print_function

from __future__ import absolute_import

from textwrap import dedent

from dateutil.rrule import rrule, DAILY

from figures.management.base import BaseBackfillCommand
from figures.tasks import (
populate_daily_metrics,
experimental_populate_daily_metrics
)


class Command(BaseBackfillCommand):
'''Populate Figures daily metrics models (``CourseDailyMetrics`` and ``SiteDailyMetrics``).
Note that correctly populating cumulative user and course count for ``SiteDailyMetrics``
relies on running this sequentially forward from the first date for which StudentModule records
are present.
'''

help = dedent(__doc__).strip()

def add_arguments(self, parser):
parser.add_argument(
'--experimental',
action='store_true',
default=False,
help=('Run with Celery workflows (Warning: This is still under'
' development and likely to get stuck/hung jobs')
)
super(Command, self).add_arguments(parser)

def handle(self, *args, **options):
'''
Note the '# pragma: no cover' lines below. This is because we are not
yet mocking celery for test coverage
'''
date_start = self.get_date(options['date_start'])
date_end = self.get_date(options['date_end'])

experimental = options['experimental']

print('BEGIN RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(
date_start, date_end
))

# populate daily metrics one day at a time for date range
for dt in rrule(DAILY, dtstart=date_start, until=date_end):

print('BEGIN: Backfill Figures daily metrics metrics for: {}'.format(dt))

kwargs = dict(
site_id=self.get_site_ids(options['site'])[0],
date_for=str(dt),
force_update=options['overwrite']
)

if experimental:
metrics_func = experimental_populate_daily_metrics
del kwargs['site_id'] # not implemented for experimental
else:
metrics_func = populate_daily_metrics
# try:
if options['no_delay']:
metrics_func(**kwargs)
else:
metrics_func.delay(**kwargs) # pragma: no cover
# except Exception as e: # pylint: disable=bare-except
# if options['ignore_exceptions']:
# self.print_exc("daily", dt, e.message)
# else:
# raise

print('END: Backfill Figures daily metrics metrics for: {}'.format(dt))

print('END RANGE: Backfilling Figures daily metrics for dates {} to {}'.format(
date_start, date_end
))
30 changes: 30 additions & 0 deletions figures/management/commands/backfill_figures_enrollment_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""This Django management command updates Figures EnrollmentData records
Running this will trigger figures.tasks.update_enrollment_data for every site
unless the '--site' option is used. Then it will update just that site
"""
from __future__ import print_function
from __future__ import absolute_import

from textwrap import dedent

from figures.management.base import BaseBackfillCommand
from figures.tasks import update_enrollment_data


class Command(BaseBackfillCommand):
"""Backfill Figures EnrollmentData model.
"""
help = dedent(__doc__).strip()

def handle(self, *args, **options):
print('BEGIN: Backfill Figures EnrollmentData')

for site_id in self.get_site_ids(options['site']):
print('Updating EnrollmentData for site {}'.format(site_id))
if options['no_delay']:
update_enrollment_data(site_id=site_id)
else:
update_enrollment_data.delay(site_id=site_id) # pragma: no cover

print('DONE: Backfill Figures EnrollmentData')
72 changes: 29 additions & 43 deletions figures/management/commands/backfill_figures_metrics.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,24 @@
"""Backfills Figures historical metrics
"""Deprecated:
Please call instead one of:
backfill_figures_daily_metrics, backfill_figures_monthly_metrics, or
backfill_figures_enrollment_data
Backfills Figures historical metrics
"""

from __future__ import print_function

from __future__ import absolute_import
from textwrap import dedent
import warnings

from django.contrib.sites.models import Site
from django.core.management import call_command
from django.core.management.base import BaseCommand

from figures.backfill import backfill_monthly_metrics_for_site
from figures.sites import get_sites


def get_site(identifier):
"""Quick-n-dirty function to let the caller choose the site id or domain
Let the 'get' fail if record can't be found from the identifier
"""
try:
filter_arg = dict(pk=int(identifier))
except ValueError:
filter_arg = dict(domain=identifier)
return Site.objects.get(**filter_arg)


def backfill_site(site, overwrite):

print('Backfilling monthly metrics for site id="{}" domain={}'.format(
site.id,
site.domain))
backfilled = backfill_monthly_metrics_for_site(site=site,
overwrite=overwrite)
if backfilled:
for rec in backfilled:
obj = rec['obj']
print('Backfilled site "{}" for {} with active user count {}'.format(
obj.site.domain,
obj.month_for,
obj.active_user_count))
else:
print('No student modules for site "{}"'.format(site.domain))


class Command(BaseCommand):
"""Populate Figures metrics models
Improvements
"""Pending Deprecation: Populate Figures metrics models
"""
help = dedent(__doc__).strip()

Expand All @@ -59,13 +31,27 @@ def add_arguments(self, parser):
help='backfill a specific site. provide id or domain name')

def handle(self, *args, **options):
'''
Pending deprecation. Passes handling off to new commands.
'''
warnings.warn(
"backfill_figures_metrics is pending deprecation and will be removed in "
"Figures 1.0. Please use one of backfill_figures_daily_metrics, "
"backfill_figures_monthly_metrics, or backfill_figures_enrollment_data, "
"instead.",
PendingDeprecationWarning
)
print('BEGIN: Backfill Figures Metrics')

if options['site']:
sites = [get_site(options['site'])]
else:
sites = get_sites()
for site in sites:
backfill_site(site, overwrite=options['overwrite'])
call_command(
'backfill_figures_monthly_metrics',
overwrite=options['overwrite'],
site=options['site']
)
call_command(
'backfill_figures_daily_metrics',
overwrite=options['overwrite'],
site=options['site']
)

print('DONE: Backfill Figures Metrics')
46 changes: 46 additions & 0 deletions figures/management/commands/backfill_figures_monthly_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Backfills Figures historical metrics
"""

from __future__ import print_function
from __future__ import absolute_import

from textwrap import dedent

from django.contrib.sites.models import Site

from figures.backfill import backfill_monthly_metrics_for_site
from figures.management.base import BaseBackfillCommand


def backfill_site(site, overwrite):

print('Backfilling monthly metrics for site id={} domain={}'.format(
site.id,
site.domain))
backfilled = backfill_monthly_metrics_for_site(site=site,
overwrite=overwrite)
if backfilled:
for rec in backfilled:
obj = rec['obj']
print('Backfilled site "{}" for {} with active user count {}'.format(
obj.site.domain,
obj.month_for,
obj.active_user_count))
else:
print('No student modules for site "{}"'.format(site.domain))


class Command(BaseBackfillCommand):
"""Backfill Figures monthly metrics models.
"""
help = dedent(__doc__).strip()

def handle(self, *args, **options):
print('BEGIN: Backfill Figures Monthly Metrics')

for site_id in self.get_site_ids(options['site']):
site = Site.objects.get(id=site_id)
backfill_site(site, overwrite=options['overwrite'])

print('END: Backfill Figures Metrics')
Loading

0 comments on commit 4b40155

Please sign in to comment.