-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingest_httpdir.py
54 lines (43 loc) · 1.63 KB
/
ingest_httpdir.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python
"""Ingest HTTP Directory
Traverses an HTTP directory tree as presented with a JSON autoindex (Nginx),
starting from a given folder URL.
Usage:
ingest_httpdir.py --url=URL --dest=path [--quiet | --verbose]
ingest_httpdir.py -h | --help
Options:
--url=URL Base folder URL to begin traverse (HTTP Index as JSON)
--verbose Increase logging output to DEBUG level.
--quiet Decrease logging output to WARNING level.
-h, --help Show this message.
"""
import logging
from jobs.httpdir import batch_ingest_httpdir
from docopt import docopt
if __name__ == '__main__':
arguments = docopt(__doc__, version='Ingest HTTP Directory v1.0')
print(arguments)
logger = logging.getLogger("ingest_httpdir")
sh = logging.StreamHandler()
logger.addHandler(sh)
if arguments['--verbose']:
logger.setLevel(logging.DEBUG)
elif arguments['--quiet']:
logger.setLevel(logging.WARNING)
else:
logger.setLevel(logging.INFO)
url = arguments["--url"]
# file_regex = arguments["FILE_REGEX"] # A regex string or None
dest = arguments["--dest"]
# DEVNULL = open('/dev/null', 'w')
if not url.endswith('/'):
logger.error("URL must be an HTTP folder URL, ending in /")
exit(1)
if not dest.endswith('/'):
logger.error("Destination path must be an existing folder path, ending in /")
exit(1)
logger.info('Instructing workers to ingest: {0}'.format(url))
# Queue traverse job for URL
result = batch_ingest_httpdir.s(url=url, dest=dest).apply_async()
print('Ingest task ID: {0}\n{1}'.format(result.id, result.info))
exit(0)