-
Notifications
You must be signed in to change notification settings - Fork 5
/
__init__.py
192 lines (149 loc) · 7.17 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import numpy as np
import cv2
import os
try:
IMREAD_UNCHANGED = cv2.CV_LOAD_IMAGE_UNCHANGED
except AttributeError:
IMREAD_UNCHANGED = cv2.IMREAD_UNCHANGED
def init_cityscapes(cityscapes_root):
'''
Append the right paths to the sys paths in order to use the provided cityscapes scripts.
- `cityscapes_root` the root folder of the dataset. Make sure to clone the cityscapescripts there too.
https://github.com/mcordts/cityscapesScripts
'''
import sys
cityscapes_scipts_dir = os.path.join(cityscapes_root, 'cityscapesScripts', 'scripts')
sys.path.append(os.path.join(cityscapes_scipts_dir, 'helpers'))
def image_names(cityscapes_root, subset, citynames=False):
'''
Retrieve all image filenames for a specific subset from the cityscape dataset.
- `cityscapes_root` the root folder of the dataset.
- `subset` the subset to be loaded can be one of `train`, `test`, `val`, `train_extra`.
'''
image_folder = os.path.join(cityscapes_root, 'leftImg8bit', subset)
cnames = []
inames = []
#Get all the images in the subfolders
for city in os.listdir(image_folder):
city_folder = os.path.join(image_folder, city)
for fname in os.listdir(city_folder):
if fname.endswith('.png'):
inames.append(os.path.join(city_folder, fname))
cnames.append(city)
return (inames, cnames) if citynames else inames
def load_images(image_names, downscale_factor=1):
'''
Load all images for a set of image names as returned by `image_names`, optionally downscale the images.
- `image_names` the list of image names to load.
- `downscale_factor` the factor with which the images will be downscaled.
Returns the images in an uint8 array of shape (N,3,H,W).
'''
H, W = 1024//downscale_factor, 2048//downscale_factor
X = np.empty((len(image_names), 3, H, W), np.uint8)
#Get all the images in the subfolders
for i, imname in enumerate(image_names):
im = cv2.imread(imname)
if im is None:
raise ValueError("Couldn't load image {}".format(imname))
if downscale_factor != 1:
im = cv2.resize(im, (W, H), interpolation=cv2.INTER_AREA)
X[i] = np.rollaxis(im[:,:,::-1], 2) # cv2 to theano (BGR to RGB and HWC to CHW)
return X
def downscale_labels(labels, f, threshold, dtype=np.int8):
'''
Downscale a label image. Each `factor`x`factor` window will be mapped to a single pixel.
If the the majority label does not have a percentage over the `threshold` the pixel will
be mapped to -1.
- `labels` the input labels.
- `f` the factor with which the images will be downscaled. Can be an integer or a (y,x) tuple.
- `threshold` the required part of the majority be a valid label [0.0, 1.0].
- `dtype` the datatype of the returned label array. The default allows labels up to 128.
'''
fy, fx = f if isinstance(f, (list, tuple)) else (f,f)
H,W = labels.shape
assert (H % fy) == 0 and (W % fx) == 0, "image size must be divisible by factor!"
h,w = H//fy, W//fx
m = np.min(labels)
M = np.max(labels)
assert -1 <= m, 'Labels should not have values below -1'
# Oh come on now `troisdorf_000000_000073_gtCoarse_labelIds` you little cunt.
if m == M:
return np.full((h,w), m, dtype)
# Count the number of occurences of the labels in each "fy x fx cell"
label_sums = np.zeros((h, w, M+2))
mx, my = np.meshgrid(np.arange(w), np.arange(h))
for dy in range(fy):
for dx in range(fx):
label_sums[my, mx, labels[dy::fy,dx::fx]] += 1
label_sums = label_sums[:,:,:-1] # "Don't know" don't count
# Use the highest-occurence label
new_labels = np.argsort(label_sums, 2)[:,:,-1].astype(dtype)
# But turn "uncertain" cells into "don't know" label.
counts = label_sums[my, mx, new_labels]
hit_counts = np.sum(label_sums, 2) * threshold
new_labels[counts <= hit_counts] = -1
return new_labels
def upsample(im, factor):
""" Very fast upsampling of two last axes of `im`age by integer `factor`. """
return np.repeat(np.repeat(im, factor, axis=-1), factor, axis=-2)
def load_labels(image_names, fine=True, preprocess=None):
'''
Load all label images for a set of rgb image names.
- `image_names` the rgb image names for which the ground truth labels should be loaded.
- `fine` wether to load the fine labels (True), or the coarse labels (False). Fine labels are only available for a subset.
- `preprocess` is a function which, given a single label image returns a new label image.
For example, you can use `lambda x: downscale_labels(x, 8, 0.5)`.
'''
#Needed for the label definitions from CS.
import labels as cs_labels
#Create a map to map between loaded labels and training labels.
label_map = np.asarray([t.trainId if t.trainId != 255 else -1 for t in cs_labels.labels], dtype=np.int8)
y = None
#Find the corresponding label images
for i, name in enumerate(image_names):
name = name.replace('leftImg8bit', 'gtFine' if fine else 'gtCoarse',1)
name = name.replace('leftImg8bit', 'gtFine_labelIds' if fine else 'gtCoarse_labelIds')
im = cv2.imread(name, IMREAD_UNCHANGED)
if im is None:
raise ValueError("Couldn't load image {}".format(name))
im_mapped = label_map[im]
if preprocess is not None:
im_mapped = preprocess(im_mapped)
# The first image determines the size of the output array for all.
# This allows preprocessing to fully determine size and dtype.
if y is None:
H, W = im_mapped.shape
y = np.empty((len(image_names), H, W), im_mapped.dtype)
y[i] = im_mapped
return y
def label_names():
from labels import labels
mapping = {l.trainId: l.name for l in labels}
mapping[255] = mapping[-1] = 'ignore'
return [mapping[i] for i in range(19)] + [mapping[-1]]
def translate(imgs, mapping, remaining=-1, output=None):
"""
Given an array `imgs` and an iterable of pairs `(a,b)` in `mapping`,
this translates all `a`s present in `imgs` into `b`s and all remaining
entries into `remaining`s. Optionally write the result into the existing
`output` array.
"""
if output is None:
output = np.full_like(imgs, remaining)
else:
output[:] = remaining
for src, dst in mapping:
output[imgs==src] = dst
return output
def id2trainId(im, unk=-1):
# Yeah, manually bitch.
# It's not like they will change these numbers anytime soon, that'd be silly.
mapping = np.array([unk, unk, unk, unk, unk, unk, unk, 0, 1, unk,
unk, 2, 3, 4, unk, unk, unk, 5, unk, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, unk,
unk, 16, 17, 18, unk], dtype=np.int8)
return mapping[np.minimum(im, len(mapping)-1)] # clamping to map 255 to the last -1 up here.
def trainId2id(im, unk=0):
# See `id2trainId` comment.
mapping = np.array([7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, unk], dtype=np.uint8)
return mapping[np.minimum(im, len(mapping)-1)] # clamping to map 255 to `unk` up here.