This repository has been archived by the owner on Nov 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathgenerate_metadata.py
72 lines (58 loc) · 1.84 KB
/
generate_metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
# Description: generates a json file that contains the data necessary for the UI
# Example usage:
# python generate_metadata.py ../data/src/pd_items.json ../js/items/ 5
import json
import math
from pprint import pprint
import re
import sys
# input
if len(sys.argv) < 3:
print "Usage: %s <inputfile items json> <outputfile item captures json> <number of files>" % sys.argv[0]
sys.exit(1)
INPUT_FILE = sys.argv[1]
OUTPUT_DIR = sys.argv[2]
FILE_COUNT = int(sys.argv[3])
# init
items = []
for line in open(INPUT_FILE,'r').readlines():
# Read line as json
item = json.loads(line)
# Retrieve capture id of item's first capture
captureId = ""
if "captureIds" in item and len(item["captureIds"]) > 0:
captureId = item["captureIds"][0].strip()
# Retrieve UUID
uuid = ""
if "captures" in item and len(item["captures"]) > 0:
capture = False
if captureId:
capture = next(iter([_c for _c in item["captures"] if _c['imageId']==captureId]), False)
if not capture:
capture = item["captures"][0]
uuid = capture["uuid"].strip()
# Retrieve title
title = ""
if "title" in item and item["title"]:
title = item["title"].encode("utf-8").strip()
items.append([uuid, title, captureId])
# Write out data
groupSize = int(math.ceil(1.0 * len(items) / FILE_COUNT))
start = 0
end = groupSize
for i in range(FILE_COUNT):
fileName = OUTPUT_DIR + 'items_'+str(i)+'_'+str(FILE_COUNT)+'.json'
if i >= FILE_COUNT-1:
group = items[start:]
else:
group = items[start:end]
start = end
end += groupSize
with open(fileName, 'w') as outfile:
data = {
'page': i,
'items': group
}
json.dump(data, outfile)
print "Wrote " + str(len(group)) + " lines to " + fileName