-
Notifications
You must be signed in to change notification settings - Fork 0
/
prep_iiif.py
249 lines (214 loc) · 9.46 KB
/
prep_iiif.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""
prep_iiif.py - create zipped tiles with cache for byte range requests
Usage (see list of options):
prep_iiif.py [-h]
This script walks through a directory of images and creates two new
directories with the same folder structure. Each image will have a
corresponding tiles.zip file in one of the new paths based on the image
file name. The other path will have the zipped file's dir structure
in a small binary file. The idea is that the tiles folders will be
stored on web storage but an intermediate process on a web server
will optionally use the dir file to look up a requested tile and
then obtain it with a byte range request.
The purpose of this approach is to avoid a gazillion files on a web
storage service for serving tiles while, at the same time, not
requiring an image server to carve up tiles dynamically. The process
is described in some detail here:
https://github.com/OurDigitalWorld/iiif_zipped
This is a work-in-progress but the hope is to find a low-cost solution
for resource constrained server environments. The resulting tiles.zip
files do not use compression (since we want to minimize on the work
that the web server performs), but they go gain efficiencies from
avoiding the sometimes astounding number of nested directories in
a typical IIIF rendering.
- art rhyno, u. of windsor & ourdigitalworld
"""
import bitstring
import glob, os, re, sys, tempfile
import json
import optparse
from pathlib import Path
from PIL import Image
from subprocess import call
import struct
import zipfile
RESIZE = 1.5 # we typically upsize an image before tiling, this could be downsized if needed
IIIF_STATIC = "./iiif_static.py" # see https://github.com/zimeon/iiif
IIIF_OPTS = "-e '/full/90,/0/default.jpg' -e '/full/200,/0/default.jpg'" # add extra options here
ZIP_MARKER = "0x504b0506" # signature for end of zip central directory record
""" sort_out_json - take incoming image info and finalize IIIF manifest """
def sort_out_json(out_folder, obj_folder, imgs, json_imgs):
last_pg = len(imgs) - 1 # calculate last page based on images
json_obj = {
"@context": "http://iiif.io/api/presentation/2/context.json",
"@type": "sc:Manifest",
"@id": obj_folder + "/manifest.json",
"label" : "",
"description" : "",
"logo" : "",
"sequences": [
{
"@type": "sc:Sequence",
"canvases": json_imgs
}
],
"structures": [
{
"@id": imgs[0] + "/ranges/1",
"@type": "sc:Range",
"label": "Front Page",
"canvases": [
imgs[0] + "/canvas/1"
],
"within": ""
},
{
"@id": imgs[last_pg] + "/ranges/" + str(last_pg + 1),
"@type": "sc:Range",
"label": "Last Page",
"canvases": [
imgs[last_pg] + "/canvas/" + str(last_pg + 1)
],
"within": ""
}
]
}
json_dump = json.dumps(json_obj, indent=4)
with open(out_folder + "/manifest.json", "w") as outfile:
outfile.write(json_dump)
""" resize_by_mult - use multiple to resize image """
def resize_by_mult(image, mult):
with Image.open (image) as im:
width, height = im.size
resized_dimensions = (int(width * mult), int(height * mult))
resized = im.resize(resized_dimensions)
width, height = resized.size
return width, height, resized
return 0, 0, None
""" zipdir - add to zip archive, put files in tiles folder """
def zipdir(path, ziph):
for root, dirs, files in os.walk(path):
for file in files:
tile_file = os.path.join(root, file)
out_file = tile_file.replace(path,"tiles")
ziph.write(tile_file,out_file)
""" sort_out_zipdir - extract zip directory from archive """
def sort_out_zipdir(dir_loc,zip_file,dir_file):
Path(dir_loc).mkdir(parents=True, exist_ok=True) # create folder structure
zfile = open(zip_file,"rb")
bin_stream = bitstring.ConstBitStream(zfile) # use bitstring to handle binary
bin_stream.find(ZIP_MARKER)
bin_buffer = bin_stream.read("bytes:12") # move to position
bin_buffer = bin_stream.read("bytes:4") # zip dir specs
zip_dir_size = struct.unpack("<L",bin_buffer)[0]
bin_buffer = bin_stream.read("bytes:4")
zip_offset = struct.unpack("<L",bin_buffer)[0]
zfile.close() # close stream
zfile = open(zip_file,"rb") # reopen for clean start
zfile.seek(zip_offset,0) # search from start using offset
zip_dir_data = zfile.read(zip_dir_size)
with open(dir_file,"wb") as f:
f.write(zip_dir_data) # write out zip dir
""" sort_out_zip - pull together archive """
def sort_out_zip(ofolder, identifier, temp_dir):
info_json = temp_dir + "/info.json"
if os.path.exists(info_json): # info.json is created by tiles process
contents = ""
with open(info_json, 'r') as info_json_file:
contents = info_json_file.read().replace(temp_dir,identifier)
info_json_file.close()
zip_cloud_loc = ofolder + "/cloud" + identifier
zip_cache_loc = ofolder + "/cache" + identifier
zip_file = ofolder + "/cloud" + identifier + '/tiles.zip'
dir_file = ofolder + "/cache" + identifier + '/dir.bin'
if len(contents) > 0:
info_json_file = open(info_json,"w")
info_json_file.write(contents)
info_json_file.close()
Path(zip_cloud_loc).mkdir(parents=True, exist_ok=True)
zipf = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_STORED,
allowZip64=False, compresslevel=None)
zipdir(temp_dir, zipf)
zipf.close()
if os.path.exists(zip_file):
sort_out_zipdir(zip_cache_loc,zip_file,dir_file)
""" proc_image_folder - build image collection into IIIF layout """
def proc_image_folder(iroot,ifolder,ofolder):
imgs = []
json_imgs = []
img_path = ifolder.replace(iroot,"")
img_list = glob.glob(ifolder + '/*') # assuming everything in directory is image
img_list = sorted(img_list)
pg_no = 1
for img in img_list:
img_bits = os.path.splitext(img.replace(ifolder + "/",""))
dir_bits = os.path.splitext(img.replace(iroot,""))
w, h, target_img = resize_by_mult(img,RESIZE) # TODO: make resize optional
tf = tempfile.NamedTemporaryFile()
temp_file_name = tf.name
target_img.save(temp_file_name, "JPEG") # save resized file in JPEG
# cloud directory is what will hold zips destined for web storage
img_folder = ofolder + "/cloud" + img_path
identifier = img_path + "/" + img_bits[0]
imgs.append(identifier)
if not os.path.exists(img_folder):
Path(img_folder).mkdir(parents=True, exist_ok=True)
if not os.path.exists(ofolder + "/cloud" + identifier):
td = tempfile.TemporaryDirectory(dir='')
# we run IIIF tile cutting as shell process - this can be slow
cmd_line = "python %s -i '%s' " % (IIIF_STATIC,td.name)
cmd_line += IIIF_OPTS
cmd_line += (" -d '.' %s" % temp_file_name)
call(cmd_line, shell=True)
sort_out_zip(ofolder,identifier, td.name)
td.cleanup() # we don't keep resulting tiles anywhere but archive
# add image info in bare-bones IIIF format
json_imgs.append({ "@type": "sc:Canvas",
"@id": identifier + "/canvas/" + str(pg_no),
"label": "Pg. " + str(pg_no),
"width": w,
"height": h,
"images": [
{
"@type": "oa:Annotation",
"motivation": "sc:painting",
"on": identifier + "/canvas/" + str(pg_no),
"resource": {
"@type": "dctypes:Image",
"@id": identifier + "/full/103,/0/default.jpg",
"service": {
"@context": "http://iiif.io/api/image/2/context.json",
"@id": identifier,
"profile": "http://iiif.io/api/image/2/level2.json"
}
}
}
]})
pg_no += 1
tf.close()
sort_out_json(img_folder, img_path, imgs, json_imgs) # images are ready to pass to manifest
return True
p = optparse.OptionParser(description='Process image files for cnode',
usage='usage: %prog [options] folder (-h for help)')
p.add_option('--dst', '-d', action='store', default='',
help="Destination directory for output")
p.add_option('--folder', '-f', action='store', default='',
help="Input directory")
(opt,_) = p.parse_args()
if (len(opt.folder) == 0 or len(opt.dst) == 0):
print("missing directory information, exiting...")
quit()
root_list = glob.glob(opt.folder)
for root in root_list:
print("root ->", root)
folder_list = glob.glob(root + '/*')
folder_list = sorted(folder_list)
for folder in folder_list:
print("folder -->", folder)
sub_folder_list = glob.glob(folder + '/*')
sub_folder_list = sorted(sub_folder_list)
for sub_folder in sub_folder_list:
print("subfolder --->", sub_folder)
if not proc_image_folder(root,sub_folder,opt.dst):
print("problem!")
quit()