diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index fd0057d..0000000 --- a/.coveragerc +++ /dev/null @@ -1,28 +0,0 @@ -[run] -source = py/desitransfer -omit = - py/desitransfer/_version.py - py/desitransfer/conftest* - py/desitransfer/cython_version* - py/desitransfer/setup_package* - py/desitransfer/*/setup_package* - py/desitransfer/*/*/setup_package* - py/desitransfer/sphinx/* - py/desitransfer/test/* - py/desitransfer/*/test/* - py/desitransfer/*/*/test/* - -[report] -exclude_lines = - # Have to re-enable the standard pragma - pragma: no cover - - # Don't complain about packages we have installed - except ImportError - - # Don't complain if tests don't hit assertions - raise AssertionError - raise NotImplementedError - - # Don't complain about script hooks - def main\(.*\): diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 32724dc..b0b44b1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -17,15 +17,15 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install Python dependencies @@ -43,15 +43,15 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.9'] + python-version: ['3.10'] steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install Python dependencies @@ -74,15 +74,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ['3.9'] + python-version: ['3.10'] steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install Python dependencies @@ -97,15 +97,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ['3.9'] + python-version: ['3.10'] steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install Python dependencies @@ -124,15 +124,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ['3.9'] + python-version: ['3.10'] steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install Python dependencies diff --git a/.readthedocs.yml b/.readthedocs.yml index 3781d94..2cd1f45 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,21 +5,20 @@ # Required version: 2 -# Set the version of Python and other tools you might need build: - os: ubuntu-22.04 + os: "ubuntu-22.04" tools: python: "3.10" # Build documentation in the doc/ directory with Sphinx sphinx: configuration: doc/conf.py - fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub -formats: all +# formats: all # Optionally set the version of Python and requirements required to build your docs python: install: - - requirements: doc/rtd-requirements.txt + - requirements: doc/rtd-requirements.txt + diff --git a/MANIFEST.in b/MANIFEST.in index d72c06d..7081da0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,12 +1,2 @@ -include LICENSE.rst -include README.rst -include requirements.txt - -graft bin -graft doc -graft etc - -prune build -prune dist -prune htmlcov -prune doc/_build +prune .github +global-exclude .gitignore .readthedocs.yml diff --git a/bin/desi_nightlog_init.sh b/bin/desi_nightlog_init.sh deleted file mode 100755 index 30bf9ed..0000000 --- a/bin/desi_nightlog_init.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -# -# Program or script you want to run -# -PROGRAM=${DESITRANSFER}/bin/desi_nightlog_transfer -PRGFILE=$(basename ${PROGRAM}) -PRGDIR=$(dirname ${PROGRAM}) -# -# Command line options for PRGFILE -# -if [[ -z "${NERSC_HOST}" ]]; then - PRGOPTS='--debug --no-permission' -else - PRGOPTS='--debug' -fi -# -# Common initialization code. -# -source ${PRGDIR}/desi_common_init.sh -# -# Main program. -# -case "$1" in - start | stop | status | restart) - $1 - ;; - *) - echo "Usage: $0 {start|stop|status|restart}" - exit 2 - ;; -esac -exit $? diff --git a/bin/desi_nightlog_transfer b/bin/desi_spacewatch_transfer similarity index 57% rename from bin/desi_nightlog_transfer rename to bin/desi_spacewatch_transfer index 3ccae61..8618ef9 100755 --- a/bin/desi_nightlog_transfer +++ b/bin/desi_spacewatch_transfer @@ -1,4 +1,4 @@ #!/usr/bin/env python from sys import exit -from desitransfer.nightlog import main +from desitransfer.spacewatch import main exit(main()) diff --git a/bin/desi_transfer_statistics.sh b/bin/desi_transfer_statistics.sh index b144901..35ceb31 100755 --- a/bin/desi_transfer_statistics.sh +++ b/bin/desi_transfer_statistics.sh @@ -49,6 +49,36 @@ if [[ "${year}" == "2021" ]]; then echo "Total data during fire recovery = ${total_fire_data} KB." fi # +# Statistics during security recovery period. 20230804 - 20230924 +# +if [[ "${year}" == "2022" ]]; then + number_of_fire_nights=0 + number_of_fire_exposures=0 + total_fire_data=0 + declare -a nights + for d in $(seq 4 31); do + nights+=( ${next_year}08$(printf "%02d" ${d}) ) + done + for d in $(seq 1 24); do + nights+=( ${next_year}09$(printf "%02d" ${d}) ) + done + for night in "${nights[@]}"; do + if [[ -d ${DESI_SPECTRO_DATA}/${night} ]]; then + number_of_fire_nights=$(( number_of_fire_nights + 1 )) + for e in ${DESI_SPECTRO_DATA}/${night}/*; do + echo ${e} + expid=$(basename ${e}) + expid_size=$(du -k -s ${e} | awk '{print $1}') + number_of_fire_exposures=$(( number_of_fire_exposures + 1 )) + total_fire_data=$(( total_fire_data + expid_size )) + done + fi + done + echo "Number of nights during security recovery = ${number_of_fire_nights}." + echo "Number of exposures during security recovery = ${number_of_fire_exposures}." + echo "Total data during security recovery = ${total_fire_data} KB." +fi +# # Nightwatch data. # NIGHTWATCH=${DESI_ROOT}/spectro/nightwatch/kpno @@ -87,4 +117,4 @@ echo "Total nightwatch data = ${total_nightwatch_data} KB." # - We are transferring a small amount of engineering files ~ 10 MB/night. # - We are *not* transferring nightwatch data. # - We are streaming to the database replica at NERSC. This is the biggest share. -# \ No newline at end of file +# diff --git a/bin/desi_tucson_transfer_catchup.sh b/bin/desi_tucson_transfer_catchup.sh new file mode 100755 index 0000000..e88e067 --- /dev/null +++ b/bin/desi_tucson_transfer_catchup.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# +# Parallel copy DESI mirror data, to catch up after outages. +# +if [[ -z "${DESISYNC_HOSTNAME}" ]]; then + echo "ERROR: DESISYNC_HOSTNAME is undefined!" + exit 1 +fi +if [[ -z "${DESI_ROOT}" ]]; then + echo "ERROR: DESI_ROOT is undefined!" + exit 1 +fi +syn="/usr/bin/rsync --archive --verbose --delete --delete-after --no-motd --password-file ${HOME}/.desi" +src=rsync://${DESISYNC_HOSTNAME}/desi +dst=${DESI_ROOT} +log_root=${HOME}/Documents/Logfiles + +for d in engineering/focalplane engineering/focalplane/hwtables \ + spectro/data \ + spectro/redux/daily spectro/redux/daily/exposures spectro/redux/daily/preproc spectro/redux/daily/tiles \ + spectro/nightwatch/kpno spectro/staging/lost+found; do + case ${d} in + engineering/focalplane) priority='nice'; exclude='--exclude archive --exclude hwtables --exclude *.ipynb --exclude .ipynb_checkpoints' ;; + engineering/focalplane/hwtables) priority='nice'; exclude='--include *.csv --exclude *' ;; + spectro/data) priority=''; exclude='--exclude 2018* --exclude 2019* --exclude 2020* --exclude 2021* --exclude 2022*' ;; + spectro/redux/daily) priority=''; exclude='--exclude *.tmp --exclude attic --exclude exposures --exclude preproc --exclude temp --exclude tiles' ;; + spectro/redux/daily/exposures) priority=''; exclude='--exclude *.tmp' ;; + spectro/redux/daily/preproc) priority=''; exclude='--exclude *.tmp --exclude preproc-*.fits --exclude preproc-*.fits.gz' ;; + spectro/redux/daily/tiles) priority=''; exclude='--exclude *.tmp --exclude temp' ;; + *) priority='nice'; exclude='' ;; + esac + log=${log_root}/catchup_$(tr '/' '_' <<<${d}).log + [[ -f ${log} ]] || touch ${log} + echo "${priority} ${syn} ${exclude} ${src}/${d}/ ${dst}/${d}/ &>> ${log} &" + ${priority} ${syn} ${exclude} ${src}/${d}/ ${dst}/${d}/ &>> ${log} & +done + +# log=${log_root}/catchup_engineering_focalplane.log +# [[ -f ${log} ]] || touch ${log} +# nice ${syn} --exclude archive --exclude hwtables --exclude \*.ipynb --exclude .ipynb_checkpoints \ +# ${src}/engineering/focalplane/ ${dst}/engineering/focalplane/ &>> ${log} & + +# log=${log_root}/catchup_engineering_focalplane_hwtables.log +# [[ -f ${log} ]] || touch ${log} +# nice ${syn} --include \*.csv --exclude \* \ +# ${src}/engineering/focalplane/hwtables/ ${dst}/engineering/focalplane/hwtables/ &>> ${log} & + +# log=${log_root}/catchup_spectro_data.log +# [[ -f ${log} ]] || touch ${log} +# ${syn} --exclude 2018\* --exclude 2019\* --exclude 2020\* --exclude 2021\* --exclude 2022\* \ +# ${src}/spectro/data/ ${dst}/spectro/data/ &>> ${log} & + +# log=${log_root}/catchup_spectro_redux_daily.log +# [[ -f ${log} ]] || touch ${log} +# ${syn} --exclude \*.tmp --exclude attic --exclude exposures --exclude preproc --exclude temp --exclude tiles \ +# ${src}/spectro/redux/daily/ ${dst}/spectro/redux/daily/ &>> ${log} & + +# log=${log_root}/catchup_spectro_redux_daily_exposures.log +# [[ -f ${log} ]] || touch ${log} +# ${syn} --exclude \*.tmp \ +# ${src}/spectro/redux/daily/exposures/ ${dst}/spectro/redux/daily/exposures/ &>> ${log} & + +# log=${log_root}/catchup_spectro_redux_daily_preproc.log +# [[ -f ${log} ]] || touch ${log} +# ${syn} --exclude \*.tmp --exclude preproc-\*.fits --exclude preproc-\*.fits.gz \ +# ${src}/spectro/redux/daily/preproc/ ${dst}/spectro/redux/daily/preproc/ &>> ${log} & + +# log=${log_root}/catchup_spectro_redux_daily_tiles.log +# [[ -f ${log} ]] || touch ${log} +# ${syn} --exclude \*.tmp --exclude temp \ +# ${src}/spectro/redux/daily/tiles/ ${dst}/spectro/redux/daily/tiles/ &>> ${log} & + +# log=${log_root}/catchup_spectro_nightwatch_kpno.log +# [[ -f ${log} ]] || touch ${log} +# nice ${syn} \ +# ${src}/spectro/nightwatch/kpno/ ${dst}/spectro/nightwatch/kpno/ &>> ${log} & + +# log=${log_root}/catchup_spectro_staging_lost+found.log +# [[ -f ${log} ]] || touch ${log} +# nice ${syn} \ +# ${src}/spectro/staging/lost+found/ ${dst}/spectro/staging/lost+found/ &>> ${log} & diff --git a/doc/api.rst b/doc/api.rst index 1e516f7..ceca319 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -14,10 +14,10 @@ desitransfer API .. automodule:: desitransfer.daily :members: -.. automodule:: desitransfer.nightlog +.. automodule:: desitransfer.nightwatch :members: -.. automodule:: desitransfer.nightwatch +.. automodule:: desitransfer.spacewatch :members: .. automodule:: desitransfer.status diff --git a/doc/changes.rst b/doc/changes.rst index 73db4df..2ae8847 100644 --- a/doc/changes.rst +++ b/doc/changes.rst @@ -5,13 +5,14 @@ Change Log 1.0.0 (unreleased) ------------------ -* *Planned*: refactor package to deprecate ``setup.py``. -* *Planned*: remove deprecated code such as ``nightlog.py``. - -0.9.3 (unreleased) ------------------- - -* No changes yet. +* Refactor package to deprecate ``setup.py`` (PR `#58`_). +* Remove deprecated code such as ``nightlog.py`` (PR `#58`_). +* Add Spacewatch image download (PR `#58`_). +* Download nightlog data earlier in the day (PR `#58`_). +* Update engineering data transferred to Tucson (PR `#58`_). +* Better logging of and resilience to nightwatch transfer errors (PR `#58`_). + +.. _`#58`: https://github.com/desihub/desitransfer/pull/58 0.9.2 (2023-05-31) ------------------ diff --git a/py/desitransfer/daemon.py b/py/desitransfer/daemon.py index d83dc19..ca67715 100644 --- a/py/desitransfer/daemon.py +++ b/py/desitransfer/daemon.py @@ -8,6 +8,7 @@ """ import datetime as dt import hashlib +import importlib.resources as ir import json import logging import os @@ -15,7 +16,6 @@ import shutil import stat import subprocess as sub -import sys import time import traceback import requests @@ -25,7 +25,6 @@ from logging.handlers import RotatingFileHandler, SMTPHandler from socket import getfqdn from tempfile import TemporaryFile -from pkg_resources import resource_filename from desiutil.log import get_logger from .common import dir_perm, file_perm, rsync, yesterday, empty_rsync, new_exposures, ensure_scratch from .status import TransferStatus @@ -70,7 +69,7 @@ class TransferDaemon(object): """ _link_re = re.compile(r'[0-9]{8}/[0-9]{8}$') _directory = namedtuple('_directory', 'source, staging, destination, hpss, checksum') - _default_configuration = resource_filename('desitransfer', 'data/desi_transfer_daemon.ini') + _default_configuration = os.path.join(str(ir.files('desitransfer')), 'data', 'desi_transfer_daemon.ini') def __init__(self, options): if options.configuration is None: @@ -344,7 +343,7 @@ def checksum(self, checksum_file, status): log.debug("status.update('%s', '%s', 'checksum', failure=True)", night, exposure) status.update(night, exposure, 'checksum', failure=True) else: - log.warning("No checksum file for %s/%s!", night, exposure) + log.critical("No checksum file for %s/%s!", night, exposure) log.debug("status.update('%s', '%s', 'checksum', failure=True)", night, exposure) status.update(night, exposure, 'checksum', failure=True) diff --git a/py/desitransfer/daily.py b/py/desitransfer/daily.py index d097162..ef67921 100644 --- a/py/desitransfer/daily.py +++ b/py/desitransfer/daily.py @@ -6,13 +6,11 @@ Entry point for :command:`desi_daily_transfer`. """ +import importlib.resources as ir import os import stat import subprocess as sub -import sys -import time from argparse import ArgumentParser -from pkg_resources import resource_filename from .common import dir_perm, file_perm, rsync, stamp from . import __version__ as dtVersion @@ -106,36 +104,48 @@ def permission(self): return status -def _config(): +def _config(timeframe): """Wrap configuration so that module can be imported without environment variables set. + + Parameters + ---------- + timeframe : :class:`str` + Return the set of directories associated with `timeframe`. + + Returns + ------- + :class:`list` + A list of directories to transfer. """ - nightlog_include = resource_filename('desitransfer', - 'data/desi_nightlog_transfer_kpno.txt') - nightwatch_exclude = resource_filename('desitransfer', - 'data/desi_nightwatch_transfer_exclude.txt') + nightlog_include = os.path.join(str(ir.files('desitransfer')), + 'data', 'desi_nightlog_transfer_kpno.txt') + # nightwatch_exclude = os.path.join(str(ir.files('desitransfer')), + # 'data', 'desi_nightwatch_transfer_exclude.txt') engineering = os.path.realpath(os.path.join(os.environ['DESI_ROOT'], 'engineering')) spectro = os.path.realpath(os.path.join(os.environ['DESI_ROOT'], 'spectro')) survey = os.path.realpath(os.path.join(os.environ['DESI_ROOT'], 'survey')) - return [DailyDirectory('/data/dts/exposures/lost+found', - os.path.join(spectro, 'staging', 'lost+found'), - dirlinks=True), - DailyDirectory('/software/www2/html/nightlogs', - os.path.join(survey, 'ops', 'nightlogs'), - extra=['--include-from', nightlog_include, '--exclude', '*']), - DailyDirectory('/data/focalplane/calibration', - os.path.join(engineering, 'focalplane', 'calibration')), - DailyDirectory('/data/focalplane/logs/calib_logs', - os.path.join(engineering, 'focalplane', 'logs', 'calib_logs')), - DailyDirectory('/data/focalplane/logs/kpno', - os.path.join(engineering, 'focalplane', 'logs', 'kpno')), - DailyDirectory('/data/focalplane/logs/sequence_logs', - os.path.join(engineering, 'focalplane', 'logs', 'sequence_logs')), - DailyDirectory('/data/focalplane/fp_temp_files', - os.path.join(engineering, 'focalplane', 'hwtables'), - extra=['--include', '*.csv', '--exclude', '*'])] + if timeframe == 'morning': + return [DailyDirectory('/software/www2/html/nightlogs', + os.path.join(survey, 'ops', 'nightlogs'), + extra=['--include-from', nightlog_include, '--exclude', '*']),] + else: + return [DailyDirectory('/data/dts/exposures/lost+found', + os.path.join(spectro, 'staging', 'lost+found'), + dirlinks=True), + DailyDirectory('/data/focalplane/calibration', + os.path.join(engineering, 'focalplane', 'calibration')), + DailyDirectory('/data/focalplane/logs/calib_logs', + os.path.join(engineering, 'focalplane', 'logs', 'calib_logs')), + DailyDirectory('/data/focalplane/logs/kpno', + os.path.join(engineering, 'focalplane', 'logs', 'kpno')), + DailyDirectory('/data/focalplane/logs/sequence_logs', + os.path.join(engineering, 'focalplane', 'logs', 'sequence_logs')), + DailyDirectory('/data/focalplane/fp_temp_files', + os.path.join(engineering, 'focalplane', 'hwtables'), + extra=['--include', '*.csv', '--exclude', '*'])] def _options(): @@ -148,28 +158,20 @@ def _options(): """ desc = "Transfer non-critical DESI data from KPNO to NERSC." prsr = ArgumentParser(description=desc) - # prsr.add_argument('-b', '--backup', metavar='H', type=int, default=20, - # help='UTC time in hours to trigger HPSS backups (default %(default)s:00 UTC).') prsr.add_argument('-c', '--completion', metavar='FILE', default=os.path.join(os.environ['DESI_ROOT'], 'spectro', 'staging', 'status', 'daily.txt'), help='Signal completion of transfer via FILE (default %(default)s).') prsr.add_argument('-d', '--debug', action='store_true', help='Set log level to DEBUG.') - # prsr.add_argument('-D', '--daemon', action='store_true', - # help='Run in daemon mode. If not specificed, the script will run once and exit.') - # prsr.add_argument('-e', '--rsh', metavar='COMMAND', dest='ssh', default='/bin/ssh', - # help="Use COMMAND for remote shell access (default '%(default)s').") prsr.add_argument('-k', '--kill', metavar='FILE', default=os.path.join(os.environ['HOME'], 'stop_desi_transfer'), help="Exit the script when FILE is detected (default %(default)s).") prsr.add_argument('-P', '--no-permission', action='store_false', dest='permission', help='Do not set permissions for DESI collaboration access.') - # prsr.add_argument('-s', '--sleep', metavar='H', type=int, default=24, - # help='In daemon mode, sleep H hours before checking for new data (default %(default)s hours).') - # prsr.add_argument('-S', '--shadow', action='store_true', - # help='Observe the actions of another data transfer script but do not make any changes.') prsr.add_argument('-V', '--version', action='version', version='%(prog)s {0}'.format(dtVersion)) + prsr.add_argument('timeframe', choices=['morning', 'noon'], + help="Run transfer tasks associated with a specific time.") return prsr.parse_args() @@ -181,28 +183,26 @@ def main(): :class:`int` An integer suitable for passing to :func:`sys.exit`. """ + status = 0 options = _options() - if options.debug: - print("DEBUG: os.remove('%s')" % options.completion) - try: - os.remove(options.completion) - except FileNotFoundError: - pass - while True: - if os.path.exists(options.kill): - print("INFO: %s detected, shutting down daily transfer script." % options.kill) - return 0 - for d in _config(): - status = d.transfer(permission=options.permission) - if status != 0: - print("ERROR: rsync problem detected for {0.source} -> {0.destination}!".format(d)) - # return status - # if options.daemon: - # time.sleep(options.sleep*60*60) - # else: - # return 0 + if options.timeframe == 'noon': + if options.debug: + print(f"DEBUG: os.remove('{options.completion}')") + try: + os.remove(options.completion) + except FileNotFoundError: + pass + if os.path.exists(options.kill): + print(f"INFO: {options.kill} detected, shutting down daily {options.timeframe} transfer script.") + return 0 + for d in _config(options.timeframe): + s = d.transfer(permission=options.permission) + if s != 0: + print(f"ERROR: rsync problem detected for {d.source} -> {d.destination}!") + status |= s + if options.timeframe == 'noon': if options.debug: - print("DEBUG: daily transfer complete at %s. Writing %s." % (stamp(), options.completion)) + print(f"DEBUG: daily {options.timeframe} transfer complete at {stamp()}. Writing {options.completion}.") with open(options.completion, 'w') as c: c.write(stamp() + "\n") - return 0 + return status diff --git a/py/desitransfer/nightlog.py b/py/desitransfer/nightlog.py deleted file mode 100644 index 5ced1be..0000000 --- a/py/desitransfer/nightlog.py +++ /dev/null @@ -1,209 +0,0 @@ -# Licensed under a 3-clause BSD style license - see LICENSE.rst -# -*- coding: utf-8 -*- -""" -desitransfer.nightlog -===================== - -Bi-directional sync of KPNO and NERSC nightlog data. - -Run as a daemon on ``desi@dtn01.nersc.gov``. -""" -import logging -import os -import stat -import subprocess as sub -import time -from argparse import ArgumentParser -from logging.handlers import RotatingFileHandler, SMTPHandler -from pkg_resources import resource_filename -from socket import getfqdn -from tempfile import TemporaryFile -from desiutil.log import get_logger -from .daemon import _popen -from .common import rsync, today -from . import __version__ as dtVersion - - -log = None - - -def _options(): - """Parse command-line options for :command:`desi_nightlog_transfer`. - - Returns - ------- - :class:`argparse.Namespace` - The parsed command-line options. - """ - desc = "Transfer DESI nightlog data files." - prsr = ArgumentParser(description=desc) - # prsr.add_argument('-B', '--no-backup', action='store_false', dest='backup', - # help="Skip NERSC HPSS backups.") - # prsr.add_argument('-c', '--configuration', metavar='FILE', - # help="Read configuration from FILE.") - prsr.add_argument('-d', '--debug', action='store_true', - help='Set log level to DEBUG.') - prsr.add_argument('-k', '--kill', metavar='FILE', - default=os.path.join(os.environ['HOME'], 'stop_desi_transfer'), - help="Exit the script when FILE is detected (default %(default)s).") - prsr.add_argument('-P', '--no-permission', action='store_false', dest='permission', - help='Do not set permissions for DESI collaboration access.') - # prsr.add_argument('-S', '--shadow', action='store_true', - # help='Observe the actions of another data transfer script but do not make any changes.') - prsr.add_argument('-s', '--sleep', metavar='M', type=int, default=5, - help='Sleep M minutes before checking for new data (default %(default)s minutes).') - prsr.add_argument('-t', '--test', action='store_true', dest='test', - help='Test mode. Do not transfer any files.') - prsr.add_argument('-V', '--version', action='version', - version='%(prog)s {0}'.format(dtVersion)) - return prsr.parse_args() - - -def _configure_log(debug): - """Re-configure the default logger returned by ``desiutil.log``. - - Parameters - ---------- - debug : :class:`bool` - If ``True`` set the log level to ``DEBUG``. - """ - global log - # conf = self.conf['logging'] - log = get_logger(timestamp=True) - h = log.parent.handlers[0] - handler = RotatingFileHandler(os.path.join(os.environ['DESI_ROOT'], 'survey', 'ops', 'nightlogs', 'desi_nightlog_transfer.log'), - maxBytes=100000000, - backupCount=100) - handler.setFormatter(h.formatter) - log.parent.removeHandler(h) - log.parent.addHandler(handler) - if debug: - log.setLevel(logging.DEBUG) - email_from = os.environ['USER'] + '@' + getfqdn() - handler2 = SMTPHandler('localhost', email_from, ['desi-alarms-transfer@desi.lbl.gov', ], - 'Critical error reported by desi_nightlog_transfer!') - fmt = """Greetings, - -At %(asctime)s, desi_nightlog_transfer failed with this message: - -%(message)s - -Kia ora koutou, -The DESI Collaboration Account -""" - formatter2 = logging.Formatter(fmt, datefmt='%Y-%m-%d %H:%M:%S %Z') - handler2.setFormatter(formatter2) - handler2.setLevel(logging.CRITICAL) - log.parent.addHandler(handler2) - - -def main(): - """Entry point for :command:`desi_nightlog_transfer`. - - Returns - ------- - :class:`int` - An integer suitable for passing to :func:`sys.exit`. - """ - options = _options() - _configure_log(options.debug) - errcount = 0 - wait = options.sleep*60 - kpno_source = '/software/www2/html/nightlogs' - nersc_source = os.path.join(os.environ['DESI_ROOT'], 'survey', 'ops', 'nightlogs') - kpno_include = resource_filename('desitransfer', 'data/desi_nightlog_transfer_kpno.txt') - nersc_include = resource_filename('desitransfer', 'data/desi_nightlog_transfer_nersc.txt') - while True: - log.info('Starting nightlog transfer loop; desitransfer version = %s.', - dtVersion) - if os.path.exists(options.kill): - log.info("%s detected, shutting down nightlog daemon.", - options.kill) - return 0 - night = today() - t0 = time.time() - # - # First check if there is any data for tonight yet. - # - log.info('Checking for nightlog data from %s.', night) - cmd = ['/bin/rsync', 'dts:{0}/'.format(kpno_source)] - log.debug(' '.join(cmd)) - status, out, err = _popen(cmd) - kpno_found = False - if status != '0': - errcount += 1 - log.error('Getting KPNO file list for %s; trying again in %d minutes.', night, options.sleep) - time.sleep(wait) - continue - for line in out.split('\n'): - if line.endswith(night): - log.info(line) - kpno_found = True - nersc_found = os.path.exists(os.path.join(nersc_source, night)) - if not (kpno_found or nersc_found): - log.info('No KPNO or NERSC nightlog data found for %s; trying again in %d minutes.', night, options.sleep) - time.sleep(wait) - continue - # - # Sync per-night directory. - # - if kpno_found: - cmd = rsync(os.path.join(kpno_source, night), - os.path.join(nersc_source, night), test=options.test) - cmd.insert(cmd.index('--omit-dir-times') + 1, '--include-from') - cmd.insert(cmd.index('--include-from') + 1, kpno_include) - cmd.insert(cmd.index(kpno_include) + 1, '--exclude') - cmd.insert(cmd.index('--exclude') + 1, '*') - log.info('Syncing %s KPNO -> NERSC.', night) - log.debug(' '.join(cmd)) - status, out, err = _popen(cmd) - if status != '0': - errcount += 1 - log.error('Syncing %s KPNO -> NERSC.', night) - if nersc_found: - cmd = rsync(os.path.join(nersc_source, night), - os.path.join(kpno_source, night), test=options.test, - reverse=True) - cmd.insert(cmd.index('--omit-dir-times') + 1, '--include-from') - cmd.insert(cmd.index('--include-from') + 1, nersc_include) - cmd.insert(cmd.index(nersc_include) + 1, '--exclude') - cmd.insert(cmd.index('--exclude') + 1, '*') - log.info('Syncing %s NERSC -> KPNO.', night) - log.debug(' '.join(cmd)) - status, out, err = _popen(cmd) - if status != '0': - errcount += 1 - log.error('Syncing %s NERSC -> KPNO.', night) - # - # Correct the permissions. - # - if options.permission: - nightdir = os.path.join(nersc_source, night) - if os.path.exists(nightdir): - log.info('Fixing permissions for DESI.') - cmd = ['fix_permissions.sh', nightdir] - log.debug(' '.join(cmd)) - if options.test: - status = '0' - else: - status, out, err = _popen(cmd) - if status != '0': - errcount += 1 - log.error('Fixing permissions for %s.', nightdir) - else: - log.info('No data yet for night %s.', night) - else: - log.info("Skipping permission changes at user request.") - # - # Check for accumulated errors. - # - if errcount > 10: - log.critical('Transfer error count exceeded, check logs!') - return 1 - # - # If all that took less than sleep.wait minutes, sleep a bit. - # - dt = time.time() - t0 - if dt < wait: - log.info('Sleeping for a bit.') - time.sleep(wait - dt) diff --git a/py/desitransfer/nightwatch.py b/py/desitransfer/nightwatch.py index a682bb8..e28d4d3 100644 --- a/py/desitransfer/nightwatch.py +++ b/py/desitransfer/nightwatch.py @@ -4,37 +4,32 @@ desitransfer.nightwatch ======================= -Sync KPNO nightwatch. This module will hopefully be integrated into -the standard transfer daemon. +Sync KPNO nightwatch. Due to differences in timing and directory structure, +this is kept separate from the raw data transfer daemon. -Run as desi@dtn01.nersc.gov. +A cronjob running as desi@dtn01.nersc.gov ensures that this daemon is running. Catchup on a specific night:: NIGHT=20200124 && rsync -rlvt --exclude-from ${DESITRANSFER}/py/desitransfer/data/desi_nightwatch_transfer_exclude.txt dts:/exposures/nightwatch/${NIGHT}/ /global/cfs/cdirs/desi/spectro/nightwatch/kpno/${NIGHT}/ - -Typical startup sequence (bash shell):: +By-hand startup sequence (bash shell):: source /global/common/software/desi/desi_environment.sh datatran module load desitransfer nohup nice -19 ${DESITRANSFER}/bin/desi_nightwatch_transfer &> /dev/null & tail -f ${DESI_ROOT}/spectro/nightwatch/desi_nightwatch_transfer.log -The above sequence is for starting by hand. A cronjob on dtn01 should ensure -that the script is running. """ +import importlib.resources as ir import logging import os import re import stat -import subprocess as sub import time from argparse import ArgumentParser from logging.handlers import RotatingFileHandler, SMTPHandler -from pkg_resources import resource_filename from socket import getfqdn -from tempfile import TemporaryFile from desiutil.log import get_logger from .common import rsync, today, idle_time from .daemon import _popen @@ -55,19 +50,15 @@ def _options(): """ desc = "Transfer DESI nightwatch data files." prsr = ArgumentParser(description=desc) - # prsr.add_argument('-B', '--no-backup', action='store_false', dest='backup', - # help="Skip NERSC HPSS backups.") - # prsr.add_argument('-c', '--configuration', metavar='FILE', - # help="Read configuration from FILE.") prsr.add_argument('-d', '--debug', action='store_true', help='Set log level to DEBUG.') + prsr.add_argument('-e', '--alert-after-errors', dest='maxerrors', metavar='N', type=int, default=10, + help='Send an alert after N serious transfer errors (default %(default)s).') prsr.add_argument('-k', '--kill', metavar='FILE', default=os.path.join(os.environ['HOME'], 'stop_desi_transfer'), help="Exit the script when FILE is detected (default %(default)s).") prsr.add_argument('-P', '--no-permission', action='store_false', dest='permission', help='Do not set permissions for DESI collaboration access.') - # prsr.add_argument('-S', '--shadow', action='store_true', - # help='Observe the actions of another data transfer script but do not make any changes.') prsr.add_argument('-s', '--sleep', metavar='M', type=int, default=1, help='Sleep M minutes before checking for new data (default %(default)s minutes).') prsr.add_argument('-V', '--version', action='version', @@ -84,7 +75,6 @@ def _configure_log(debug): If ``True`` set the log level to ``DEBUG``. """ global log - # conf = self.conf['logging'] log = get_logger(timestamp=True) h = log.parent.handlers[0] handler = RotatingFileHandler(os.path.join(os.environ['DESI_ROOT'], 'spectro', 'nightwatch', 'desi_nightwatch_transfer.log'), @@ -100,7 +90,7 @@ def _configure_log(debug): 'Critical error reported by desi_nightwatch_transfer!') fmt = """Greetings, -At %(asctime)s, desi_nightwatch_transfer failed with this message: +At %(asctime)s, desi_nightwatch_transfer reported this message: %(message)s @@ -128,9 +118,8 @@ def main(): source = '/exposures/nightwatch' basedir = os.path.join(os.environ['DESI_ROOT'], 'spectro', 'nightwatch') kpnodir = os.path.join(basedir, 'kpno') - # syncdir = os.path.join(basedir, 'sync') - exclude = resource_filename('desitransfer', 'data/desi_nightwatch_transfer_exclude.txt') - include = resource_filename('desitransfer', 'data/desi_nightwatch_transfer_include.txt') + exclude = os.path.join(str(ir.files('desitransfer')), 'data', 'desi_nightwatch_transfer_exclude.txt') + include = os.path.join(str(ir.files('desitransfer')), 'data', 'desi_nightwatch_transfer_include.txt') with open(include) as i: top_level_files = i.read().strip().split('\n') log.debug(', '.join(top_level_files)) @@ -160,13 +149,15 @@ def main(): status, out, err = _popen(cmd) found = False if status != '0': - errcount += 1 - log.error('Getting file list for %s; trying again in %d minutes.', night, options.sleep) + log.error('Error detected while syncing the list of nights; trying again in %d minutes.', night, options.sleep) + log.error("STATUS = %s", status) + log.error("STDOUT = \n%s", out) + log.error("STDERR = \n%s", err) time.sleep(wait) continue for line in out.split('\n'): if re.match(nightline.format(night=night), line) is not None: - log.info(line) + log.debug(line) found = True break if not found: @@ -184,8 +175,14 @@ def main(): log.debug(' '.join(cmd)) status, out, err = _popen(cmd) if status != '0': - errcount += 1 - log.error('Syncing %s.', night) + if 'file has vanished' in err: + log.warning("File vanished while syncing %s; not serious.") + else: + errcount += 1 + log.error('Unknown error detected while syncing %s.', night) + log.error("STATUS = %s", status) + log.error("STDOUT = \n%s", out) + log.error("STDERR = \n%s", err) # # Correct the permissions. # @@ -197,7 +194,10 @@ def main(): status, out, err = _popen(cmd) if status != '0': errcount += 1 - log.error('Fixing permissions for %s.', nightdir) + log.error('Errror detected while fixing permissions for %s.', nightdir) + log.error("STATUS = %s", status) + log.error("STDOUT = \n%s", out) + log.error("STDERR = \n%s", err) else: log.info('No data yet for night %s.', night) else: @@ -213,8 +213,10 @@ def main(): log.debug(' '.join(cmd)) status, out, err = _popen(cmd) if status != '0': - errcount += 1 - log.error('Syncing top level html files.') + log.error('Error detected while syncing top level html files.') + log.error("STATUS = %s", status) + log.error("STDOUT = \n%s", out) + log.error("STDERR = \n%s", err) # # Hack: just add world read to those top level files since fix_permissions.sh # is recursive and we don't want to redo all nights. @@ -225,11 +227,14 @@ def main(): top_level_files_mode) os.chmod(os.path.join(kpnodir, filename), top_level_files_mode) # - # Check for accumulated errors. + # Check for accumulated errors. Don't exit, but do send an alert email. # - if errcount > 10: - log.critical('Transfer error count exceeded, shutting down.') - return 1 + if errcount > options.maxerrors: + log.critical('More than %d serious transfer errors detected, check the logs!', errcount) + # + # Reset the count so we don't get email every minute. + # + errcount = 0 # # If all that took less than options.sleep minutes, sleep a bit. # diff --git a/py/desitransfer/spacewatch.py b/py/desitransfer/spacewatch.py new file mode 100644 index 0000000..5b5bd1f --- /dev/null +++ b/py/desitransfer/spacewatch.py @@ -0,0 +1,187 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +# -*- coding: utf-8 -*- +""" +desitransfer.spacewatch +======================= + +Download Spacewatch data from a server at KPNO. + +Notes +----- +* Spacewatch data rolls over at 00:00 UTC = 17:00 MST. +* The data relevant to the previous night, say 20231030, would be downloaded + on the morning of 20231031. +* Therefore to obtain all data of interest, just download the files that + have already appeared in 2023/10/31/ (Spacewatch directory structure) + the morning after DESI night 20231030. +""" +import datetime +import os +import re +from argparse import ArgumentParser +from html.parser import HTMLParser +try: + utc = datetime.UTC +except AttributeError: + # datetime.UTC is in Python 3.11 + import pytz + utc = pytz.UTC +import requests +from desiutil.log import get_logger, DEBUG +from . import __version__ as dtVersion +from .common import yesterday + + +log = None + + +class SpacewatchHTMLParser(HTMLParser): + """Extract JPG files from an HTML index. + """ + def __init__(self, *args, **kwargs): + super(SpacewatchHTMLParser, self).__init__(*args, **kwargs) + self.jpg_re = re.compile(r'[0-9]{8}_[0-9]{6}\.jpg') + self.jpg_files = list() + + def handle_starttag(self, tag, attrs): + """Process HTML tags, in this case targeting anchor tags. + """ + if tag == 'a': + href = [a[1] for a in attrs if a[0] == 'href'] + if href: + if self.jpg_re.match(href[0]) is not None: + self.jpg_files.append(href[0]) + + +def jpg_list(index): + """Obtain a list of JPEG files from an HTML index. + + Parameters + ---------- + index : :class:`str` + The URL of an HTML index. + + Returns + ------- + :class:`list` + A list of JPEG files found in `index`. The `index` URL is attached + to the file names. + """ + try: + r = requests.get(index) + except (requests.RequestException, requests.ConnectionError, requests.HTTPError) as e: + log.critical(e.args[0]) + return [] + if r.status_code == 200: + parser = SpacewatchHTMLParser() + parser.feed(r.content.decode(r.headers['Content-Type'].split('=')[1])) + return [index + j for j in parser.jpg_files] + else: + log.critical("Unexpected status when listing JPEG files: %d!", r.status_code) + return [] + + +def download_jpg(files, destination, overwrite=False, test=False): + """Download `files` to `destination`. + + Parameters + ---------- + files : :class:`list` + A list of URLs to download. + destination : :class:`str` + A local directory to hold the files. + overwrite : :class:`str`, optional + If ``True``, overwrite any existing files. + test : :class:`bool`, optional + If ``True``, do not download any files. + + Returns + ------- + :class:`int` + The number of files downloaded. + """ + downloaded = 0 + if not test and not os.path.isdir(destination): + log.debug("os.makedirs('%s')", destination) + os.makedirs(destination) + for jpg in files: + base_jpg = jpg.split('/')[-1] + dst_jpg = os.path.join(destination, base_jpg) + if os.path.exists(dst_jpg) and not overwrite: + # Overwrite? + log.debug("Skipping existing file: %s.", dst_jpg) + pass + else: + log.debug("r = requests.get('%s')", jpg) + if not test: + r = requests.get(jpg) + if r.status_code == 200: + downloaded += 1 + timestamp = int(datetime.datetime.strptime(r.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z').replace(tzinfo=utc).timestamp()) + with open(dst_jpg, 'wb') as j: + j.write(r.content) + os.utime(dst_jpg, (timestamp, timestamp)) + return downloaded + + +def _options(): + """Parse command-line options for :command:`desi_nightwatch_transfer`. + + Returns + ------- + :class:`argparse.Namespace` + The parsed command-line options. + """ + desc = "Transfer Spacewatch data files." + prsr = ArgumentParser(description=desc) + prsr.add_argument('-d', '--debug', action='store_true', + help='Set log level to DEBUG.') + prsr.add_argument('-D', '--date', action='store', metavar='YYYY/MM/DD', + help='Download files for a specific date instead of today.') + prsr.add_argument('-o', '--overwrite', action='store_true', + help='Overwrite any existing files.') + prsr.add_argument('-s', '--server', metavar='SERVER', + default=os.getenv('SPACEWATCH_SERVER', 'SPACEWATCH_SERVER'), + help='Set the Spacwatch server name to SERVER (default "%(default)s").') + prsr.add_argument('-t', '--test', action='store_true', + help='Do not actually download any files; implies --debug.') + prsr.add_argument('-V', '--version', action='version', + version='%(prog)s {0}'.format(dtVersion)) + prsr.add_argument('destination', metavar='DIR', help='Download files to DIR.') + return prsr.parse_args() + + +def main(): + """Entry point for :command:`desi_spacewatch_transfer`. + + Returns + ------- + :class:`int` + An integer suitable for passing to :func:`sys.exit`. + """ + global log + options = _options() + if options.debug or options.test: + log = get_logger(DEBUG) + else: + log = get_logger() + if options.server == 'SPACEWATCH_SERVER': + log.critical("Spacewatch server name is not set!") + return 1 + spacewatch_root = f'http://{options.server}/allsky-all/images/cropped/' + if options.date is not None: + today = options.date + else: + today = datetime.date.today().strftime("%Y/%m/%d") + y = yesterday() + ystrdy = f"{y[0:4]}/{y[4:6]}/{y[6:8]}" + spacewatch_today = spacewatch_root + today + '/' + spacewatch_yesterday = spacewatch_root + ystrdy + '/' + n_files = download_jpg(jpg_list(spacewatch_today), os.path.join(options.destination, today), + overwrite=options.overwrite, test=options.test) + log.debug("%d files downloaded for %s.", n_files, today) + if options.date is None: + n_files = download_jpg(jpg_list(spacewatch_yesterday), os.path.join(options.destination, ystrdy), + overwrite=options.overwrite, test=options.test) + log.debug("%d files downloaded for %s.", n_files, ystrdy) + return 0 diff --git a/py/desitransfer/status.py b/py/desitransfer/status.py index 6e82637..7d2a57f 100644 --- a/py/desitransfer/status.py +++ b/py/desitransfer/status.py @@ -6,14 +6,13 @@ Entry point for :command:`desi_transfer_status`. """ +import importlib.resources as ir import json import os import shutil -import sys import time from datetime import date from argparse import ArgumentParser -from pkg_resources import resource_filename from desiutil.log import log, DEBUG from . import __version__ as dtVersion @@ -47,8 +46,8 @@ def __init__(self, directory, install=False, year=None): log.debug("os.makedirs('%s', exist_ok=True)", self.directory) os.makedirs(self.directory, exist_ok=True) for ext in ('html', 'js'): - src = resource_filename('desitransfer', - 'data/desi_transfer_status.' + ext) + src = os.path.join(str(ir.files('desitransfer')), + 'data', 'desi_transfer_status.' + ext) if ext == 'html': log.debug("shutil.copyfile('%s', '%s')", src, os.path.join(self.directory, 'index.html')) diff --git a/py/desitransfer/test/test_common.py b/py/desitransfer/test/test_common.py index d5653df..30d2ea1 100644 --- a/py/desitransfer/test/test_common.py +++ b/py/desitransfer/test/test_common.py @@ -3,7 +3,6 @@ """Test desitransfer.common. """ from datetime import datetime, timedelta -import os import unittest from unittest.mock import patch from tempfile import TemporaryDirectory diff --git a/py/desitransfer/test/test_daemon.py b/py/desitransfer/test/test_daemon.py index b91964e..e7d41a4 100644 --- a/py/desitransfer/test/test_daemon.py +++ b/py/desitransfer/test/test_daemon.py @@ -3,6 +3,7 @@ """Test desitransfer.daemon. """ import datetime +import importlib.resources as ir import json import logging import os @@ -12,7 +13,6 @@ import requests from tempfile import TemporaryDirectory from unittest.mock import call, patch, MagicMock -from pkg_resources import resource_filename from ..daemon import (_options, TransferDaemon, _popen, log, verify_checksum, lock_directory, unlock_directory, rsync_night) @@ -481,7 +481,7 @@ def test_TransferDaemon_exposure_checksum_missing(self, mock_cl, mock_log, mock_ mock_lock.assert_called_once_with('/desi/root/spectro/staging/raw/20190703/00000127', False) mock_exists.assert_has_calls([call('/desi/root/spectro/staging/raw/20190703/00000127/checksum-00000127.sha256sum')]) # mock_cksum.assert_called_once_with('/desi/root/spectro/staging/raw/20190703/00000127/checksum-00000127.sha256sum') - mock_log.warning.assert_called_once_with("No checksum file for %s/%s!", '20190703', '00000127') + mock_log.critical.assert_called_once_with("No checksum file for %s/%s!", '20190703', '00000127') mock_status.update.assert_has_calls([call('20190703', '00000127', 'rsync'), call('20190703', '00000127', 'checksum', failure=True)]) mock_mv.assert_called_once_with('/desi/root/spectro/staging/raw/20190703/00000127', '/desi/root/spectro/data/20190703') @@ -713,8 +713,8 @@ def test_TransferDaemon_catchup(self, mock_cl, mock_log, mock_status, mock_isdir mock_popen.return_value = ('0', r1, '') transfer.catchup(c[0], '20190703', mock_status) mock_rsync.assert_called_once_with('/data/dts/exposures/raw', '/desi/root/spectro/data', '20190703', False) - mock_log.warning.assert_has_calls([call('New files detected in %s!', '20190703'), - call("No checksum file for %s/%s!", '20190703', '00001234'), + mock_log.warning.assert_called_once_with('New files detected in %s!', '20190703') + mock_log.critical.assert_has_calls([call("No checksum file for %s/%s!", '20190703', '00001234'), call("No checksum file for %s/%s!", '20190703', '00001235')], any_order=True) mock_log.debug.assert_has_calls([call("verify_checksum('%s')", '/desi/root/spectro/data/20190703/00001234/checksum-00001234.sha256sum'), call("status.update('%s', '%s', 'checksum', failure=True)", '20190703', '00001234'), @@ -1041,7 +1041,7 @@ def test_popen(self, mock_popen, mock_temp): def test_verify_checksum(self): """Test checksum verification. """ - c = resource_filename('desitransfer.test', 't/t.sha256sum') + c = os.path.join(str(ir.files('desitransfer.test')), 't', 't.sha256sum') d = os.path.dirname(c) with patch('os.listdir') as mock_listdir: mock_listdir.return_value = ['t.sha256sum', 'test_file_1.txt', 'test_file_2.txt'] diff --git a/py/desitransfer/test/test_daily.py b/py/desitransfer/test/test_daily.py index f472f42..837ebcf 100644 --- a/py/desitransfer/test/test_daily.py +++ b/py/desitransfer/test/test_daily.py @@ -33,13 +33,24 @@ def test_config(self): """ with patch.dict('os.environ', {'DESI_ROOT': '/desi/root'}): - c = _config() + c = _config('noon') self.assertEqual(c[0].source, '/data/dts/exposures/lost+found') self.assertEqual(c[0].destination, os.path.join(os.environ['DESI_ROOT'], 'spectro', 'staging', 'lost+found')) self.assertTrue(c[0].dirlinks) self.assertFalse(c[1].dirlinks) + def test_config_morning(self): + """Test transfer directory configuration at a different time. + """ + with patch.dict('os.environ', + {'DESI_ROOT': '/desi/root'}): + c = _config('morning') + self.assertEqual(c[0].source, '/software/www2/html/nightlogs') + self.assertEqual(c[0].destination, os.path.join(os.environ['DESI_ROOT'], + 'survey', 'ops', 'nightlogs')) + self.assertFalse(c[0].dirlinks) + def test_options(self): """Test command-line arguments. """ @@ -47,7 +58,8 @@ def test_options(self): {'DESI_ROOT': '/desi/root'}): with patch.object(sys, 'argv', ['desi_daily_transfer', '--debug', '--kill', - os.path.expanduser('~/stop_daily_transfer')]): + os.path.expanduser('~/stop_daily_transfer'), + 'noon']): options = _options() self.assertTrue(options.permission) self.assertEqual(options.completion, diff --git a/py/desitransfer/test/test_nightlog.py b/py/desitransfer/test/test_nightlog.py deleted file mode 100644 index 602c3cf..0000000 --- a/py/desitransfer/test/test_nightlog.py +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed under a 3-clause BSD style license - see LICENSE.rst -# -*- coding: utf-8 -*- -"""Test desitransfer.nightlog. -""" -# import datetime -import logging -import os -# import shutil -import sys -import unittest -from tempfile import TemporaryDirectory -from unittest.mock import call, patch, MagicMock -from pkg_resources import resource_filename -from ..nightlog import (_options, _configure_log) - - -class TestNightlog(unittest.TestCase): - """Test desitransfer.nightlog. - """ - - @classmethod - def setUpClass(cls): - pass - - @classmethod - def tearDownClass(cls): - pass - - def setUp(self): - """Create a temporary directory to simulate SCRATCH. - """ - self.tmp = TemporaryDirectory() - - def tearDown(self): - """Clean up temporary directory. - """ - self.tmp.cleanup() - - def test_options(self): - """Test command-line arguments. - """ - with patch.object(sys, 'argv', ['desi_nightlog_transfer', '--debug']): - options = _options() - self.assertTrue(options.debug) - self.assertEqual(options.kill, - os.path.join(os.environ['HOME'], - 'stop_desi_transfer')) - - @patch('desitransfer.nightlog.SMTPHandler') - @patch('desitransfer.nightlog.RotatingFileHandler') - @patch('desitransfer.nightlog.get_logger') - @patch('desitransfer.nightlog.log') # Needed to restore the module-level log object after test. - def test_configure_log(self, mock_log, gl, rfh, smtp): - """Test logging configuration. - """ - with patch.dict('os.environ', {'SCRATCH': self.tmp.name, - 'DESI_ROOT': '/desi/root'}): - with patch.object(sys, 'argv', ['desi_nightlog_transfer', '--debug']): - options = _options() - _configure_log(options) - rfh.assert_called_once_with('/desi/root/survey/ops/nightlogs/desi_nightlog_transfer.log', - backupCount=100, maxBytes=100000000) - gl.assert_called_once_with(timestamp=True) - gl().setLevel.assert_called_once_with(logging.DEBUG) diff --git a/py/desitransfer/test/test_nightwatch.py b/py/desitransfer/test/test_nightwatch.py index 65e8076..a6ef27c 100644 --- a/py/desitransfer/test/test_nightwatch.py +++ b/py/desitransfer/test/test_nightwatch.py @@ -2,15 +2,12 @@ # -*- coding: utf-8 -*- """Test desitransfer.nightwatch. """ -# import datetime import logging import os -# import shutil import sys import unittest from tempfile import TemporaryDirectory -from unittest.mock import call, patch, MagicMock -from pkg_resources import resource_filename +from unittest.mock import call, patch from ..nightwatch import (_options, _configure_log) diff --git a/py/desitransfer/test/test_spacewatch.py b/py/desitransfer/test/test_spacewatch.py new file mode 100644 index 0000000..984f703 --- /dev/null +++ b/py/desitransfer/test/test_spacewatch.py @@ -0,0 +1,229 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +# -*- coding: utf-8 -*- +"""Test desitransfer.spacewatch. +""" +import logging +import os +import sys +import unittest +from tempfile import TemporaryDirectory +from unittest.mock import call, patch, Mock +from ..spacewatch import (_options, jpg_list, download_jpg) + + +class TestSpacewatch(unittest.TestCase): + """Test desitransfer.spacewatch. + """ + + @classmethod + def setUpClass(cls): + pass + + @classmethod + def tearDownClass(cls): + pass + + def setUp(self): + """Create a temporary directory to simulate SCRATCH. + """ + self.tmp = TemporaryDirectory() + + def tearDown(self): + """Clean up temporary directory. + """ + self.tmp.cleanup() + + def test_options(self): + """Test command-line arguments. + """ + with patch.object(sys, 'argv', ['desi_spacewatch_transfer', '--debug', '/desi/external/spacewatch']): + with patch.dict('os.environ', {'SPACEWATCH_SERVER': 'www.example.com'}): + options = _options() + self.assertTrue(options.debug) + self.assertEqual(options.server, 'www.example.com') + + def test_options_bad_env(self): + """Test command-line arguments with missing env variable. + """ + with patch.object(sys, 'argv', ['desi_spacewatch_transfer', '--debug', '/desi/external/spacewatch']): + options = _options() + self.assertTrue(options.debug) + self.assertEqual(options.server, 'SPACEWATCH_SERVER') + + @patch('desitransfer.spacewatch.requests') + def test_jpg_files(self, mock_requests): + """Test parsing an index.html file. + """ + mock_contents = Mock() + mock_contents.headers = {'Content-Type': 'text/html;charset=ISO-8859-1'} + mock_contents.status_code = 200 + mock_contents.content = """ + + + Index of /allsky-all/images/cropped/2023/10/31 + + +

Index of /allsky-all/images/cropped/2023/10/31

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ [ICO] + + Name + + Last modified + + Size + + Description +
+
+
+ [PARENTDIR] + + Parent Directory +   -  
+ [IMG] + + 20231031_000005.jpg + 2023-10-31 00:00 142K 
+ [IMG] + + 20231031_000205.jpg + 2023-10-31 00:02 143K 
+ [IMG] + + 20231031_000405.jpg + 2023-10-31 00:04 138K 
+ [IMG] + + 20231031_000605.jpg + 2023-10-31 00:06 142K 
+
+
+ +""".encode('ISO-8859-1') + mock_requests.get.return_value = mock_contents + jpg_files = jpg_list('http://foo.bar/') + mock_requests.get.assert_called_once_with('http://foo.bar/') + self.assertListEqual(jpg_files, ['http://foo.bar/20231031_000005.jpg', + 'http://foo.bar/20231031_000205.jpg', + 'http://foo.bar/20231031_000405.jpg', + 'http://foo.bar/20231031_000605.jpg']) + + @patch('desitransfer.spacewatch.log') + @patch('desitransfer.spacewatch.requests') + def test_jpg_files_bad_status(self, mock_requests, mock_log): + """Test bad HTTP status on jpg file list. + """ + mock_contents = Mock() + mock_contents.headers = {'Content-Type': 'text/html;charset=ISO-8859-1'} + mock_contents.status_code = 404 + mock_contents.content = b'' + mock_requests.get.return_value = mock_contents + jpg_files = jpg_list('http://foo.bar/') + self.assertListEqual(jpg_files, []) + mock_log.critical.assert_called_once_with("Unexpected status when listing JPEG files: %d!", 404) + + @patch('desitransfer.spacewatch.log') + @patch('desitransfer.spacewatch.requests') + def test_jpg_files_request_exception(self, mock_requests, mock_log): + """Test requests exceptions for jpg list. + """ + # mock_contents = Mock() + # mock_contents.headers = {'Content-Type': 'text/html;charset=ISO-8859-1'} + # mock_contents.status_code = 404 + # mock_contents.content = b'' + mock_requests.RequestException = Exception + mock_requests.ConnectionError = Exception + mock_requests.HTTPError = Exception + msg = "Exception thrown when attempting to access file list!" + mock_requests.get.side_effect = mock_requests.ConnectionError(msg) + jpg_files = jpg_list('http://foo.bar/') + self.assertListEqual(jpg_files, []) + mock_log.critical.assert_called_once_with(msg) + + @patch('desitransfer.spacewatch.log') + @patch('os.utime') + @patch('desitransfer.spacewatch.requests') + @patch('os.path.exists') + def test_download_jpg(self, mock_exists, mock_requests, mock_utime, mock_log): + """Test downloads of JPEG files. + """ + mock_exists.side_effect = lambda x: x == os.path.join(destination, 'baz', '20231031_000005.jpg') + # mock_exists.return_value = False + mock_contents = Mock() + mock_contents.headers = {'Last-Modified': 'Mon, 30 Oct 2023 00:00:24 GMT'} + mock_contents.status_code = 200 + mock_contents.content = b"""123456789""" + mock_requests.get.return_value = mock_contents + files = ['http://foo.bar/20231031_000005.jpg', + 'http://foo.bar/20231031_000205.jpg', + 'http://foo.bar/20231031_000405.jpg', + 'http://foo.bar/20231031_000605.jpg'] + destination = self.tmp.name + n = download_jpg(files, destination + '/baz') + self.assertEqual(n, 3) + mock_exists.assert_has_calls([call(os.path.join(destination, 'baz', '20231031_000005.jpg')), + call(os.path.join(destination, 'baz', '20231031_000205.jpg')), + call(os.path.join(destination, 'baz', '20231031_000405.jpg')), + call(os.path.join(destination, 'baz', '20231031_000605.jpg'))]) + mock_requests.get.assert_has_calls([call('http://foo.bar/20231031_000205.jpg'), + call('http://foo.bar/20231031_000405.jpg'), + call('http://foo.bar/20231031_000605.jpg')]) + mock_utime.assert_has_calls([call(os.path.join(destination, 'baz', '20231031_000205.jpg'), (1698624024, 1698624024)), + call(os.path.join(destination, 'baz', '20231031_000405.jpg'), (1698624024, 1698624024)), + call(os.path.join(destination, 'baz', '20231031_000605.jpg'), (1698624024, 1698624024))]) + mock_log.debug.assert_has_calls([call("os.makedirs('%s')", os.path.join(destination, 'baz')), + call("Skipping existing file: %s.", + os.path.join(destination, 'baz', '20231031_000005.jpg')), + call("r = requests.get('%s')", 'http://foo.bar/20231031_000205.jpg'), + call("r = requests.get('%s')", 'http://foo.bar/20231031_000405.jpg'), + call("r = requests.get('%s')", 'http://foo.bar/20231031_000605.jpg')]) diff --git a/py/desitransfer/test/test_status.py b/py/desitransfer/test/test_status.py index 5216c06..8f1a0ed 100644 --- a/py/desitransfer/test/test_status.py +++ b/py/desitransfer/test/test_status.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """Test desitransfer.status. """ +import importlib.resources as ir import json import os import shutil @@ -9,7 +10,6 @@ import unittest from unittest.mock import patch, call from tempfile import TemporaryDirectory -from pkg_resources import resource_filename from ..status import TransferStatus, _options @@ -45,10 +45,8 @@ def test_options(self): def test_TransferStatus_init(self): """Test status reporting mechanism setup. """ - h = resource_filename('desitransfer', - 'data/desi_transfer_status.html') - j = resource_filename('desitransfer', - 'data/desi_transfer_status.js') + h = os.path.join(str(ir.files('desitransfer')), 'data', 'desi_transfer_status.html') + j = os.path.join(str(ir.files('desitransfer')), 'data', 'desi_transfer_status.js') # # Existing empty directory. # @@ -93,7 +91,7 @@ def test_TransferStatus_init(self): def test_TransferStatus_handle_malformed_with_log(self, mock_log): """Test handling of malformed JSON files. """ - bad = resource_filename('desitransfer.test', 't/bad.json') + bad = os.path.join(str(ir.files('desitransfer.test')), 't', 'bad.json') with TemporaryDirectory() as d: shutil.copy(bad, os.path.join(d, 'desi_transfer_status_2020.json')) s = TransferStatus(d, year=2020) diff --git a/py/desitransfer/test/test_top_level.py b/py/desitransfer/test/test_top_level.py index e278abb..22588b4 100644 --- a/py/desitransfer/test/test_top_level.py +++ b/py/desitransfer/test/test_top_level.py @@ -4,7 +4,6 @@ """ import unittest import re -import sys from .. import __version__ as theVersion diff --git a/py/desitransfer/test/test_tucson.py b/py/desitransfer/test/test_tucson.py index 0b2b940..69180c0 100644 --- a/py/desitransfer/test/test_tucson.py +++ b/py/desitransfer/test/test_tucson.py @@ -7,8 +7,7 @@ import unittest import logging import subprocess as sub -from socket import getfqdn -from unittest.mock import patch, call, mock_open, MagicMock +from unittest.mock import patch, call, mock_open from ..tucson import _options, _rsync, _configure_log, running from .. import __version__ as dtVersion diff --git a/py/desitransfer/tucson.py b/py/desitransfer/tucson.py index 3408132..d2d22a3 100644 --- a/py/desitransfer/tucson.py +++ b/py/desitransfer/tucson.py @@ -11,7 +11,6 @@ import subprocess as sub import time from argparse import ArgumentParser -from socket import getfqdn from logging.handlers import SMTPHandler import requests from . import __version__ as dtVersion @@ -55,12 +54,14 @@ 'spectro/redux/daily/preproc', 'spectro/redux/daily/tiles', 'engineering/focalplane', - 'software/AnyConnect'] + 'software/AnyConnect', + 'software/CiscoSecureClient'] -includes = {'spectro/desi_spectro_calib': ["--exclude", ".svn"], +includes = {'engineering/focalplane': ["--exclude", "archive", "--exclude", "hwtables", "--exclude", ".ipynb_checkpoints", "--exclude", "*.ipynb"], + 'engineering/focalplane/hwtables': ["--include", "*.csv", "--exclude", "*"], + 'spectro/desi_spectro_calib': ["--exclude", ".svn"], 'spectro/data': (' '.join([f'--exclude {y:d}*' for y in range(2018, time.localtime().tm_year)])).split(), - # 'spectro/nightwatch': ["--include", "kpno/***", "--exclude", "*"], 'spectro/redux/daily': ["--exclude", "*.tmp", "--exclude", "attic", "--exclude", "exposures", "--exclude", "preproc", "--exclude", "temp", "--exclude", "tiles"], 'spectro/redux/daily/exposures': ["--exclude", "*.tmp"], 'spectro/redux/daily/preproc': ["--exclude", "*.tmp", "--exclude", "preproc-*.fits", "--exclude", "preproc-*.fits.gz"], diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..54a891e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[build-system] +requires = ["setuptools", + "wheel"] +build-backend = 'setuptools.build_meta' diff --git a/setup.cfg b/setup.cfg index 1cee296..a5faf6c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,113 @@ +[metadata] +name = desitransfer +version = attr: desitransfer.__version__ +author = DESI Collaboration +author_email = desi-data@desi.lbl.gov +license = BSD 3-Clause License +license_files = LICENSE.rst +url = https://github.com/desihub/desitransfer +description = DESI data transfer infrastructure. +long_description = file: README.rst +long_description_content_type = text/x-rst +edit_on_github = True +github_project = desihub/desitransfer +classifiers = + Development Status :: 5 - Production/Stable + Environment :: Console + Intended Audience :: Science/Research + License :: OSI Approved :: BSD License + Operating System :: OS Independent + Programming Language :: Python :: 3 + Topic :: Scientific/Engineering :: Astronomy + +[options] +zip_safe = True +package_dir = + =py +packages = find: +include_package_data = True +python_requires = >=3.9 +# setup_requires = setuptools_scm +install_requires = + requests + pytz +scripts = + bin/desi_checksum_audit.sh + bin/desi_common_init.sh + bin/desi_daily_transfer + bin/desi_nightwatch_init.sh + bin/desi_nightwatch_transfer + bin/desi_recover_lost+found.sh + bin/desi_spacewatch_transfer + bin/desi_spectro_data_unpack.sh + bin/desi_transfer_daemon + bin/desi_transfer_init.sh + bin/desi_transfer_statistics.sh + bin/desi_transfer_status + bin/desi_transfer_status_restore + bin/desi_tucson_transfer + bin/desi_tucson_transfer_catchup.sh + +# [options.entry_points] +# console_scripts = +# desi_daily_transfer = desitransfer.daily:main +# desi_nightwatch_transfer = desitransfer.nightwatch:main +# desi_spacewatch_transfer = desitransfer.spacewatch:main +# desi_transfer_daemon = desitransfer.daemon:main +# desi_transfer_status = desitransfer.status:main +# desi_tucson_transfer = desitransfer.tucson:main + +[options.extras_require] +test = + pytest +coverage = + pytest-cov + coveralls +doc = + Sphinx + sphinx-rtd-theme + +[options.package_data] +desitransfer = data/* +desitransfer.test = t/* + +[options.packages.find] +where = py +exclude = .github/* + +[coverage:run] +relative_files = True +source = + py/desitransfer +omit = + py/desitransfer/_version.py + py/desitransfer/conftest* + py/desitransfer/test/* + */desitransfer/_version.py + */desitransfer/conftest* + */desitransfer/test/* + +# [coverage:paths] +# source = +# py/desitransfer/ +# */site-packages/desitransfer/ + +[coverage:report] +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + # Don't complain about packages we have installed + except ImportError + # Don't complain if tests don't hit assertions + raise AssertionError + raise NotImplementedError + # Don't complain about script hooks + def main\(.*\): + # Ignore branches that don't pertain to this version of Python + pragma: py{ignore_python_version} + # Don't complain about IPython completion helper + def _ipython_key_completions_ + [pycodestyle] # See https://pycodestyle.readthedocs.io/en/latest/intro.html#configuration # for details of these configuration options. diff --git a/setup.py b/setup.py index 7a29b7d..061bf02 100755 --- a/setup.py +++ b/setup.py @@ -1,81 +1,85 @@ #!/usr/bin/env python # Licensed under a 3-clause BSD style license - see LICENSE.rst -# -# Standard imports -# -import glob -import os + +# NOTE: The configuration for the package, including the name, version, and +# other information are set in the setup.cfg file. + import sys -# -# setuptools' sdist command ignores MANIFEST.in -# -from distutils.command.sdist import sdist as DistutilsSdist -from setuptools import setup, find_packages -import desiutil.setup as ds -# -# Begin setup -# -setup_keywords = dict() -# -# THESE SETTINGS NEED TO BE CHANGED FOR EVERY PRODUCT. -# -setup_keywords['name'] = 'desitransfer' -setup_keywords['description'] = 'DESI data transfer infrastructure.' -setup_keywords['author'] = 'DESI Collaboration' -setup_keywords['author_email'] = 'desi-data@desi.lbl.gov' -setup_keywords['license'] = 'BSD' -setup_keywords['url'] = 'https://github.com/desihub/desitransfer' -# -# END OF SETTINGS THAT NEED TO BE CHANGED. -# -setup_keywords['version'] = ds.get_version(setup_keywords['name']) -# -# Use README.rst as long_description. -# -setup_keywords['long_description'] = '' -if os.path.exists('README.rst'): - with open('README.rst') as readme: - setup_keywords['long_description'] = readme.read() -# -# Set other keywords for the setup function. These are automated, & should -# be left alone unless you are an expert. -# -# Treat everything in bin/ except *.rst as a script to be installed. -# -if os.path.isdir('bin'): - setup_keywords['scripts'] = [fname for fname in glob.glob(os.path.join('bin', '*')) - if not os.path.basename(fname).endswith('.rst')] -setup_keywords['provides'] = [setup_keywords['name']] -setup_keywords['python_requires'] = '>=3.5' -setup_keywords['zip_safe'] = False -setup_keywords['use_2to3'] = False -setup_keywords['packages'] = find_packages('py') -setup_keywords['package_dir'] = {'': 'py'} -setup_keywords['cmdclass'] = {'module_file': ds.DesiModule, - 'version': ds.DesiVersion, - 'test': ds.DesiTest, - # 'api': ds.DesiAPI, - 'sdist': DistutilsSdist} -setup_keywords['test_suite']='{name}.test.{name}_test_suite'.format(**setup_keywords) -setup_keywords['classifiers'] = ['Development Status :: 4 - Beta', - 'Environment :: Console', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Operating System :: POSIX :: Linux', - 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Scientific/Engineering :: Astronomy'] -# -# Autogenerate command-line scripts. -# -# setup_keywords['entry_points'] = {'console_scripts':['desi_daily_transfer = desitransfer.daily:main', -# 'desi_transfer_daemon = desitransfer.daemon:main', -# 'desi_transfer_status = desitransfer.status:main']} -# -# Add internal data directories. -# -setup_keywords['package_data'] = {'desitransfer': ['data/*'], - 'desitransfer.test': ['t/*']} -# -# Run setup command. -# -setup(**setup_keywords) +from setuptools import setup + +# First provide helpful messages if contributors try and run legacy commands +# for tests or docs. + +API_HELP = """ +Note: Generating api.rst files is no longer done using 'python setup.py api'. Instead +you will need to run: + + desi_api_file + +which is part of the desiutil package. If you don't already have desiutil installed, you can install it with: + + pip install desiutil +""" + +MODULE_HELP = """ +Note: Generating Module files is no longer done using 'python setup.py api'. Instead +you will need to run: + + desiInstall + +or + + desi_module_file + +depending on your exact situation. desiInstall is preferred. Both commands are +part of the desiutil package. If you don't already have desiutil installed, you can install it with: + + pip install desiutil +""" + +VERSION_HELP = """ +Note: Generating version strings is no longer done using 'python setup.py version'. Instead +you will need to run: + + desi_update_version [-t TAG] desiutil + +which is part of the desiutil package. If you don't already have desiutil installed, you can install it with: + + pip install desiutil +""" + +TEST_HELP = """ +Note: running tests is no longer done using 'python setup.py test'. Instead +you will need to run: + + pytest + +If you don't already have pytest installed, you can install it with: + + pip install pytest +""" + +DOCS_HELP = """ +Note: building the documentation is no longer done using +'python setup.py {0}'. Instead you will need to run: + + sphinx-build -W --keep-going -b html doc doc/_build/html + +If you don't already have Sphinx installed, you can install it with: + + pip install Sphinx +""" + +message = {'api': API_HELP, + 'module_file': MODULE_HELP, + 'test': TEST_HELP, + 'version': VERSION_HELP, + 'build_docs': DOCS_HELP.format('build_docs'), + 'build_sphinx': DOCS_HELP.format('build_sphinx'), } + +for m in message: + if m in sys.argv: + print(message[m]) + sys.exit(1) + +setup()