Skip to content

Commit

Permalink
add spacewatch code
Browse files Browse the repository at this point in the history
  • Loading branch information
weaverba137 committed Oct 31, 2023
1 parent 35a5a90 commit 3c8bef0
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 12 deletions.
118 changes: 117 additions & 1 deletion py/desitransfer/spacewatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,120 @@
desitransfer.spacewatch
=======================
Entry point for :command:`desi_spacewatch_transfer`.
Download Spacewatch data from a server at KPNO.
Notes
-----
* Spacewatch data rolls over at 00:00 UTC = 17:00 MST.
* The data relevant to the previous night, say 20231030, would be downloaded
on the morning of 20231031.
* Therefore to obtain all data of interest, just download the files that
have already appeared in 2023/10/31/ (Spacewatch directory structure)
the morning after DESI night 20231030.
"""
import datetime
import os
import re
from argparse import ArgumentParser
from html.parser import HTMLParser
try:
utc = datetime.UTC
except AttributeError:
# datetime.UTC is in Python 3.11
import pytz
utc = pytz.UTC
import requests
from . import __version__ as dtVersion


class SpacewatchHTMLParser(HTMLParser):
"""Extract JPG files from an HTML index.
"""
def __init__(self, *args, **kwargs):
super(SpacewatchHTMLParser, self).__init__(*args, **kwargs)
self.jpg_re = re.compile(r'[0-9]{8}_[0-9]{6}\.jpg')
self.jpg_files = list()

def handle_starttag(self, tag, attrs):
"""Process HTML tags, in this case targeting anchor tags.
"""
if tag == 'a':
href = [a[1] for a in attrs if a[0] == 'href']
if href:
if self.jpg_re.match(href[0]) is not None:
self.jpg_files.append(href[0])


def jpg_list(index):
"""Obtain a list of JPEG files from an HTML index.
Parameters
----------
index : :class:`str`
The URL of an HTML index.
Returns
-------
:class:`list`
A list of JPEG files found in `index`. The `index` URL is attached
to the file names.
"""
r = requests.get(index)
parser = SpacewatchHTMLParser()
if r.status_code == 200:
parser.feed(r.content.decode(r.headers['Content-Type'].split('=')[1]))
return [index + j for j in parser.jpg_files]


def download_jpg(files, destination):
"""Download `files` to `destination`.
Parameters
----------
files : :class:`list`
A list of URLs to download.
destination : :class:`str`
A local directory to hold the files.
Returns
-------
:class:`int`
The number of files downloaded.
"""
downloaded = 0
for jpg in files:
base_jpg = jpg.split('/')[-1]
dst_jpg = os.path.join(destination, base_jpg)
if os.path.exists(dst_jpg):
# Overwrite?
pass
else:
r = requests.get(jpg)
if r.status_code == 200:
downloaded += 1
timestamp = int(datetime.datetime.strptime(r.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z').replace(tzinfo=utc).timestamp())
with open(dst_jpg, 'wb') as j:
j.write(r.content)
os.utime(dst_jpg, (timestamp, timestamp))
return downloaded


def _options():
"""Parse command-line options for :command:`desi_nightwatch_transfer`.
Returns
-------
:class:`argparse.Namespace`
The parsed command-line options.
"""
desc = "Transfer Spacewatch data files."
prsr = ArgumentParser(description=desc)
prsr.add_argument('-d', '--debug', action='store_true',
help='Set log level to DEBUG.')
prsr.add_argument('-V', '--version', action='version',
version='%(prog)s {0}'.format(dtVersion))
prsr.add_argument('destination', metavar='DIR', help='Download files to DIR.')
return prsr.parse_args()


def main():
Expand All @@ -16,4 +128,8 @@ def main():
:class:`int`
An integer suitable for passing to :func:`sys.exit`.
"""
options = _options()
spacewatch_root = 'https://varuna.kpno.noirlab.edu/allsky-all/images/cropped/'
spacewatch_today = spacewatch_root + datetime.date.today().strftime("%Y/%m/%d/")
n_files = download_jpg(jpg_list(spacewatch_today), options.destination)
return 0
127 changes: 116 additions & 11 deletions py/desitransfer/test/test_spacewatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import sys
import unittest
from tempfile import TemporaryDirectory
from unittest.mock import call, patch
from ..spacewatch import (main, )
from unittest.mock import call, patch, Mock
from ..spacewatch import (_options, jpg_list, download_jpg)


class TestSpacewatch(unittest.TestCase):
Expand All @@ -33,15 +33,120 @@ def tearDown(self):
"""
self.tmp.cleanup()

# def test_options(self):
# """Test command-line arguments.
# """
# with patch.object(sys, 'argv', ['desi_nightwatch_transfer', '--debug']):
# options = _options()
# self.assertTrue(options.debug)
# self.assertEqual(options.kill,
# os.path.join(os.environ['HOME'],
# 'stop_desi_transfer'))
def test_options(self):
"""Test command-line arguments.
"""
with patch.object(sys, 'argv', ['desi_spacewatch_transfer', '--debug', '/desi/external/spacewatch']):
options = _options()
self.assertTrue(options.debug)

@patch('desitransfer.spacewatch.requests')
def test_jpg_files(self, mock_requests):
"""Test parsing an index.html file.
"""
mock_contents = Mock()
mock_contents.headers = {'Content-Type': 'text/html;charset=ISO-8859-1'}
mock_contents.status_code = 200
mock_contents.content = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of /allsky-all/images/cropped/2023/10/31</title>
</head>
<body>
<h1>Index of /allsky-all/images/cropped/2023/10/31</h1>
<table>
<tr>
<th valign="top">
<img src="/icons/blank.gif" alt="[ICO]">
</th>
<th>
<a href="?C=N;O=D">Name</a>
</th>
<th>
<a href="?C=M;O=A">Last modified</a>
</th>
<th>
<a href="?C=S;O=A">Size</a>
</th>
<th>
<a href="?C=D;O=A">Description</a>
</th>
</tr>
<tr>
<th colspan="5">
<hr>
</th>
</tr>
<tr>
<td valign="top">
<img src="/icons/back.gif" alt="[PARENTDIR]">
</td>
<td>
<a href="/allsky-all/images/cropped/2023/10/">Parent Directory</a>
</td>
<td>&nbsp;</td>
<td align="right"> - </td>
<td>&nbsp;</td>
</tr>
<tr>
<td valign="top">
<img src="/icons/image2.gif" alt="[IMG]">
</td>
<td>
<a href="20231031_000005.jpg">20231031_000005.jpg</a>
</td>
<td align="right">2023-10-31 00:00 </td>
<td align="right">142K</td>
<td>&nbsp;</td>
</tr>
<tr>
<td valign="top">
<img src="/icons/image2.gif" alt="[IMG]">
</td>
<td>
<a href="20231031_000205.jpg">20231031_000205.jpg</a>
</td>
<td align="right">2023-10-31 00:02 </td>
<td align="right">143K</td>
<td>&nbsp;</td>
</tr>
<tr>
<td valign="top">
<img src="/icons/image2.gif" alt="[IMG]">
</td>
<td>
<a href="20231031_000405.jpg">20231031_000405.jpg</a>
</td>
<td align="right">2023-10-31 00:04 </td>
<td align="right">138K</td>
<td>&nbsp;</td>
</tr>
<tr>
<td valign="top">
<img src="/icons/image2.gif" alt="[IMG]">
</td>
<td>
<a href="20231031_000605.jpg">20231031_000605.jpg</a>
</td>
<td align="right">2023-10-31 00:06 </td>
<td align="right">142K</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="5">
<hr>
</th>
</tr>
</table>
</body>
</html>""".encode('ISO-8859-1')
mock_requests.get.return_value = mock_contents
jpg_files = jpg_list('http://foo.bar/')
mock_requests.get.assert_called_once_with('http://foo.bar/')
self.assertListEqual(jpg_files, ['http://foo.bar/20231031_000005.jpg',
'http://foo.bar/20231031_000205.jpg',
'http://foo.bar/20231031_000405.jpg',
'http://foo.bar/20231031_000605.jpg'])

# @patch('desitransfer.nightwatch.SMTPHandler')
# @patch('desitransfer.nightwatch.RotatingFileHandler')
Expand Down

0 comments on commit 3c8bef0

Please sign in to comment.