Merge pull request #202 from bhilbert4/add-cron-status

Add cron job monitor table to web app
spacetelescope · Dec 17, 2018 · ef0528b · ef0528b
2 parents efc6476 + 29bb4c6
commit ef0528b
Show file tree

Hide file tree

Showing 9 changed files with 390 additions and 33 deletions.
diff --git a/docs/source/jwql_monitors.rst b/docs/source/jwql_monitors.rst
@@ -2,6 +2,12 @@
 jwql_monitors
 *************
 
+monitor_cron_jobs.py
+--------------------
+.. automodule:: jwql.jwql_monitors.monitor_cron_jobs
+    :members:
+    :undoc-members:
+
 monitor_filesystem.py
 ---------------------
 .. automodule:: jwql.jwql_monitors.monitor_filesystem

diff --git a/environment.yml b/environment.yml
@@ -1,15 +1,15 @@
 name: jwql
 channels:
-- defaults
 - http://ssb.stsci.edu/astroconda-dev
+- defaults
 dependencies:
-- astropy=4.0
+- astropy
 - astroquery=0.3.8
 - bokeh=1.0.1
 - django=2.1.2
 - ipython=6.4.0
 - jinja2=2.10
-- jwst=0.7.8rc9
+- jwst
 - matplotlib=2.1.1
 - numpy=1.14.0
 - numpydoc=0.8.0

diff --git a/jwql/jwql_monitors/monitor_cron_jobs.py b/jwql/jwql_monitors/monitor_cron_jobs.py
@@ -0,0 +1,326 @@
+#! /usr/bin/env python
+
+"""This module monitors the status of the ``jwql`` monitors via their
+log files. Basic results (e.g. ``success``, ``failure``) are collected
+and placed in a ``bokeh`` table for display on the web app.
+
+Authors
+-------
+
+    - Bryan Hilbert
+
+Use
+---
+
+    This module can be executed as such:
+
+    ::
+
+        from jwql.jwql_monitors import monitor_cron_jobs
+        monitor_cron_jobs.status()
+
+Dependencies
+------------
+
+    The user must have a configuration file named ``config.json``
+    placed in the ``utils`` directory.
+"""
+
+from datetime import datetime
+import logging
+import numpy as np
+import os
+import time
+
+from bokeh.io import save, output_file
+from bokeh.models import ColumnDataSource
+from bokeh.models.widgets import DataTable, DateFormatter, HTMLTemplateFormatter, TableColumn
+
+from jwql.utils.logging_functions import configure_logging, log_info, log_fail
+from jwql.utils.permissions import set_permissions
+from jwql.utils.utils import get_config
+
+
+def create_table(status_dict):
+    """Create interactive ``bokeh`` table containing the logfile status
+    results.
+
+    Parameters
+    ----------
+    status_dict : dict
+        Nested dictionary with status results from all logfiles
+    """
+    # Rearrange the nested dictionary into a non-nested dict for the table
+    filenames = []
+    dates = []
+    missings = []
+    results = []
+    for key in status_dict:
+        filenames.append(status_dict[key]['logname'])
+        dates.append(datetime.fromtimestamp(status_dict[key]['latest_time']))
+        missings.append(str(status_dict[key]['missing_file']))
+        results.append(status_dict[key]['status'])
+
+    # div to color the boxes in the status column
+    success_template = """
+    <div style="background:<%=
+        (function colorfromstr(){
+            if(value == "success"){
+                return("green")}
+            else{return("red")}
+            }()) %>;
+        color: white">
+    <%= value %></div>
+    """
+
+    # div to color the boxes in the column for possibly late logfiles
+    missing_template = """
+    <div style="background:<%=
+        (function colorfrombool(){
+            if(value == "True"){
+                return("orange")}
+            else{return("green")}
+            }()) %>;
+        color: white">
+    <%= value %></div>
+    """
+    success_formatter = HTMLTemplateFormatter(template=success_template)
+    missing_formatter = HTMLTemplateFormatter(template=missing_template)
+
+    data = dict(name=list(status_dict.keys()), filename=filenames, date=dates, missing=missings,
+                result=results)
+    source = ColumnDataSource(data)
+
+    datefmt = DateFormatter(format="RFC-2822")
+    columns = [
+        TableColumn(field="name", title="Monitor Name", width=200),
+        TableColumn(field="filename", title="Most Recent File", width=350),
+        TableColumn(field="date", title="Most Recent Time", width=200, formatter=datefmt),
+        TableColumn(field="missing", title="Possible Missing File", width=200, formatter=missing_formatter),
+        TableColumn(field="result", title="Status", width=100, formatter=success_formatter),
+    ]
+    data_table = DataTable(source=source, columns=columns, width=800, height=280, index_position=None)
+
+    # Get output directory for saving the table files
+    output_dir = get_config()['outputs']
+    output_filename = 'cron_status_table'
+
+    # Save full html
+    html_outfile = os.path.join(output_dir, 'monitor_cron_jobs', '{}.html'.format(output_filename))
+    output_file(html_outfile)
+    save(data_table)
+    try:
+        set_permissions(html_outfile)
+    except PermissionError:
+        logging.warning('Unable to set permissions for {}'.format(html_outfile))
+    logging.info('Saved Bokeh full HTML file: {}'.format(html_outfile))
+
+
+def find_latest(logfiles):
+    """Given a list of log files in a directory, identify the most
+    recent. The way that ``jwql.utils.logging_functions.make_log_file``
+    is set up, log files for all monitors are guaranteed to be the name
+    of the monitor followed by the datetime that they were run, so we
+    should be able to simply sort the filenames and the last will be the
+    most recent.
+
+    Parameters
+    ----------
+    logfiles : list
+        List of logfiles in the directory
+
+    Returns
+    -------
+    latest : str
+        Filename of the most recent file
+
+    latest_time : float
+        Time associated with the most recent log file
+    """
+    latest = sorted(logfiles)[-1]
+    latest_time = os.path.getctime(latest)
+    return (latest, latest_time)
+
+
+def get_cadence(filenames):
+    """Calculate the cadence of the log files in a given directory.
+    Use timestamps
+
+    Parameters
+    ---------
+    filenames : list
+        List of log files to examine
+
+    Returns
+    -------
+    mean_delta : float
+        Mean time in seconds between the appearance of consecutive log
+        files
+
+    stdev_delta : float
+        Standard deviation in seconds between the appearance of
+        consecutive log files
+    """
+    minimum_log_num = 3  # Set to a low value for now since we don't have many logfiles
+    times = [os.path.getctime(filename) for filename in filenames]
+    if len(times) >= minimum_log_num:
+        sorted_times = np.array(sorted(times))
+        delta_times = sorted_times[1:] - sorted_times[0:-1]
+        mean_delta = np.mean(delta_times)
+        stdev_delta = np.std(delta_times)
+    else:
+        # If there are < minimum_log_num logfiles, then let's assume we can't
+        # get a reliable measure of cadence. Fall back to a value of
+        # 1 year between files, to avoid accidentally flagging this monitor
+        # as running late in the subsequent check
+        mean_delta = 31556736.0  # sec per year
+        stdev_delta = 31556736.0  # sec per year
+    return mean_delta, stdev_delta
+
+
+def missing_file_check(avg_time_between, uncertainty, latest_file):
+    """Given the name of the most recent log file, along with the
+    historical average time between files and the stdev of the time
+    between files, determine whether we expect a more recent log file
+    than the file given. This could hint at a problem with the cron job
+    used to create the log files.
+
+    Parameters
+    ----------
+    avg_time_between : float
+        Average number of seconds between log files
+
+    uncertainty : float
+        Standard deviation of the number of seconds between log files
+
+    latest_file : str
+        Name of the most recent log file
+
+    Returns
+    -------
+    late : bool
+        True = We expect a more recent file than that given
+        False =  It is reasonable that the file given is the most
+        recent
+    """
+    latest_time = os.path.getctime(latest_file)
+    now = time.time()
+    time_since_latest = now - latest_time
+    if time_since_latest > (avg_time_between + 3 * uncertainty):
+        late = True
+    else:
+        late = False
+    return late
+
+
+@log_fail
+@log_info
+def status(production_mode=True):
+    """Main function: determine the status of the instrument montiors
+    by examining log files.
+
+    Parameters
+    ----------
+    production_mode : bool
+        If ``True``, look in the main log directory. If ``False``, look
+        in the ``dev`` log file directory.
+
+    Returns
+    -------
+    logfile_status : dict
+        Nested dictionary containing the status for all monitors. Top
+        level keys include all monitors. Within a given monitor, the
+        value is a dictionary containing 'missing_file' and 'status'
+        keys. 'missing_file' is a boolean describing whether or not
+        there is a suspected missing log file based on the timestamps
+        of the existing files. 'status' is a string that is either
+        'success' or 'failure'.
+    """
+    # Begin logging
+    logging.info("Beginning cron job status monitor")
+
+    # Get main logfile path
+    log_path = get_config()['log_dir']
+
+    # If we are in development mode, the log files are in a slightly
+    # different location than in production mode
+    if not production_mode:
+        log_path = os.path.join(log_path, 'dev')
+
+    # Set up a dictionary to keep track of results
+    logfile_status = {}
+
+    # Get a list of the directories under the main logging directory.
+    generator = os.walk(log_path, topdown=True)
+
+    # Loop over monitors
+    for subdir, subsubdir, filenames in generator:
+        # When running in production mode, skip the 'dev' subdirectory,
+        # as it contains the development version of the monitor logs
+        if production_mode:
+            subsubdir[:] = [dirname for dirname in subsubdir if dirname != 'dev']
+
+        if len(filenames) > 0:
+            monitor_name = subdir.split('/')[-1]
+
+            # Avoid monitor_cron_jobs itseft
+            if monitor_name != 'monitor_cron_jobs':
+
+                log_file_list = [os.path.join(subdir, filename) for filename in filenames]
+
+                # Find the cadence of the monitor
+                delta_time, stdev_time = get_cadence(log_file_list)
+
+                # Identify the most recent log file
+                latest_log, latest_log_time = find_latest(log_file_list)
+
+                # Check to see if we expect a file more recent than the latest
+                missing_file = missing_file_check(delta_time, stdev_time, latest_log)
+                if missing_file:
+                    logging.warning('Expected a more recent {} logfile than {}'
+                                    .format(monitor_name, os.path.basename(latest_log)))
+
+                # Check the file for success/failure
+                result = success_check(latest_log)
+                logging.info('{}: Latest log file indicates {}'.format(monitor_name, result))
+
+                # Add results to the dictionary
+                logfile_status[monitor_name] = {'logname': os.path.basename(latest_log),
+                                                'latest_time': latest_log_time,
+                                                'missing_file': missing_file, 'status': result}
+
+    # Create table of results using Bokeh
+    create_table(logfile_status)
+    logging.info('Cron job status monitor completed successfully.')
+
+
+def success_check(filename):
+    """Parse the given log file and check whether the script execution
+    was successful or not
+
+    Parameters
+    ----------
+    filename : str
+        Name of the log file to parse
+
+    Returns
+    -------
+    execution : str
+        ``success`` or ``failure``
+    """
+    with open(filename, 'r') as file_obj:
+        all_lines = file_obj.readlines()
+    final_line = all_lines[-1]
+    if 'complete' in final_line.lower():
+        execution = 'success'
+    else:
+        execution = 'failure'
+    return execution
+
+
+if __name__ == '__main__':
+
+    module = os.path.basename(__file__).strip('.py')
+    configure_logging(module, production_mode=True)
+
+    status()
diff --git a/jwql/utils/utils.py b/jwql/utils/utils.py
@@ -78,7 +78,6 @@ def get_config():
     settings : dict
         A dictionary that holds the contents of the config file.
     """
-
     with open(os.path.join(__location__, 'config.json'), 'r') as config_file:
         settings = json.load(config_file)