-
Notifications
You must be signed in to change notification settings - Fork 2
/
store_images.py
106 lines (82 loc) · 3.51 KB
/
store_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
'''
Fetches images from Wikipedia (WikiMedia Commons), then resizes and compresses
them for use in the app.
'''
import io
import logging
import multiprocessing
import os.path
import signal
import PIL
import fetcher
import progress
from images import Image
from species import Species, SelectedSpecies
_fetcher = None
def _process_image(image):
'''
Entry point for parallel processing.
'''
global _fetcher # pylint: disable=global-statement
if not _fetcher:
_fetcher = fetcher.Fetcher('wp_images', pool_size=1)
full_output_file_name = os.path.join(_args.image_output_dir, image.output_file_name)
if os.path.exists(full_output_file_name) and not _args.recreate_images:
return image.output_file_name
image_data = _fetcher.fetch_cached(image.image_file_url)
pil_image = PIL.Image.open(io.BytesIO(image_data))
if pil_image.width > _args.image_size or pil_image.height > _args.image_size:
if pil_image.width >= pil_image.height:
output_width = _args.image_size
output_height = round(output_width / pil_image.width * pil_image.height)
else:
output_height = _args.image_size
output_width = round(output_height / pil_image.height * pil_image.width)
pil_image = pil_image.resize((output_width, output_height), resample=PIL.Image.LANCZOS)
pil_image.save(full_output_file_name,
format='WebP', quality=_args.image_quality)
return image.output_file_name
_args = None
def add_args(parser):
parser.add_argument(
'--image_output_dir',
default=os.path.join(os.path.dirname(__file__), '..', 'app', 'assets', 'images'),
help='Target directory for resized and compressed images')
parser.add_argument(
'--image_process_jobs', type=int, default=8,
help='Parallelism for fetching and resizing images')
parser.add_argument(
'--recreate_images', action='store_true',
help='Do not assume that existing image files on disk are up to date; create them anew')
parser.add_argument(
'--image_size', default=768,
help='Maximum size in pixels of bird photos measured along the longest edge')
parser.add_argument(
'--image_quality', default=60,
help='WebP quality level of bird photos')
def main(args, session):
global _args # pylint: disable=global-statement
_args = args
logging.info('Fetching image records for selected species')
images = session.query(Image)\
.join(Species, Species.species_id == Image.species_id)\
.join(SelectedSpecies)\
.all()
logging.info('Listing existing images')
old_images = set(os.listdir(args.image_output_dir))
logging.info('Resizing images')
# https://stackoverflow.com/questions/11312525/catch-ctrlc-sigint-and-exit-multiprocesses-gracefully-in-python#35134329
original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
with multiprocessing.pool.Pool(args.image_process_jobs) as pool:
signal.signal(signal.SIGINT, original_sigint_handler)
for image_file_name in progress.percent(
pool.imap(_process_image, images),
len(images)):
if image_file_name:
old_images.discard(image_file_name)
logging.info(f'Deleting {len(old_images)} old images')
for old_image in old_images:
try:
os.remove(os.path.join(args.image_output_dir, old_image))
except OSError as ex:
logging.warning(f'Could not delete {old_image}: {ex}')