-
Notifications
You must be signed in to change notification settings - Fork 3
/
3DVision.py
219 lines (187 loc) · 9.47 KB
/
3DVision.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image
from ipywidgets import interact, interactive, fixed
import glob
from yolov4.tf import YOLOv4
import tensorflow as tf
import time
yolo = YOLOv4(tiny=True)
yolo.classes = "Yolov4/coco.names"
yolo.make_model()
yolo.load_weights("Yolov4/yolov4-tiny.weights", weights_type="yolo")
def add_depth(depth_list, result, pred_bboxes):
h, w, _ = result.shape
res = result.copy()
for i, distance in enumerate(depth_list):
cv2.line(res,(int(pred_bboxes[i][0]*w - pred_bboxes[i][2]*w/2),int(pred_bboxes[i][1]*h - pred_bboxes[i][3]*h*0.5)),(int(pred_bboxes[i][0]*w + pred_bboxes[i][2]*w*0.5),int(pred_bboxes[i][1]*h + pred_bboxes[i][3]*h*0.5)),(255,255,255),2)
cv2.line(res,(int(pred_bboxes[i][0]*w + pred_bboxes[i][2]*w/2),int(pred_bboxes[i][1]*h - pred_bboxes[i][3]*h*0.5)),(int(pred_bboxes[i][0]*w - pred_bboxes[i][2]*w*0.5),int(pred_bboxes[i][1]*h + pred_bboxes[i][3]*h*0.5)),(255,255,255),2)
cv2.putText(res, '{0:.2f} m'.format(distance), (int(pred_bboxes[i][0]*w - pred_bboxes[i][2]*w*0.5),int(pred_bboxes[i][1]*h)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 100), 2, cv2.LINE_AA)
return res
res = add_depth(depth_list, result, pred_bboxes)
plt.figure(figsize = (40,20))
plt.imshow(res)
#cv2.imwrite("output/result.png", cv2.cvtColor(res, cv2.COLOR_BGR2RGB))
def find_distances(depth_map, pred_bboxes, img, method="center"):
"""
Go through each bounding box and take a point in the corresponding depth map.
It can be:
* The Center of the box
* The average value
* The minimum value (closest point)
* The median of the values
"""
depth_list = []
h, w, _ = img.shape
for box in pred_bboxes:
x1 = int(box[0]*w - box[2]*w*0.5) # center_x - width /2
y1 = int(box[1]*h-box[3]*h*0.5) # center_y - height /2
x2 = int(box[0]*w + box[2]*w*0.5) # center_x + width/2
y2 = int(box[1]*h+box[3]*h*0.5) # center_y + height/2
#print(np.array([x1, y1, x2, y2]))
obstacle_depth = depth_map[y1:y2, x1:x2]
if method=="closest":
depth_list.append(obstacle_depth.min()) # take the closest point in the box
elif method=="average":
depth_list.append(np.mean(obstacle_depth)) # take the average
elif method=="median":
depth_list.append(np.median(obstacle_depth)) # take the median
else:
depth_list.append(depth_map[int(box[1]*h)][int(box[0]*w)]) # take the center
return depth_list
depth_list = find_distances(depth_map_left, pred_bboxes, img_left, method="center")
print(depth_list)
def run_obstacle_detection(img):
start_time=time.time()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
resized_image = yolo.resize_image(img)
# 0 ~ 255 to 0.0 ~ 1.0
resized_image = resized_image / 255.
#input_data == Dim(1, input_size, input_size, channels)
input_data = resized_image[np.newaxis, ...].astype(np.float32)
candidates = yolo.model.predict(input_data)
_candidates = []
result = img.copy()
for candidate in candidates:
batch_size = candidate.shape[0]
grid_size = candidate.shape[1]
_candidates.append(tf.reshape(candidate, shape=(1, grid_size * grid_size * 3, -1)))
#candidates == Dim(batch, candidates, (bbox))
candidates = np.concatenate(_candidates, axis=1)
#pred_bboxes == Dim(candidates, (x, y, w, h, class_id, prob))
pred_bboxes = yolo.candidates_to_pred_bboxes(candidates[0], iou_threshold=0.35, score_threshold=0.40)
pred_bboxes = pred_bboxes[~(pred_bboxes==0).all(1)] #https://stackoverflow.com/questions/35673095/python-how-to-eliminate-all-the-zero-rows-from-a-matrix-in-numpy?lq=1
pred_bboxes = yolo.fit_pred_bboxes_to_original(pred_bboxes, img.shape)
exec_time = time.time() - start_time
#print("time: {:.2f} ms".format(exec_time * 1000))
result = yolo.draw_bboxes(img, pred_bboxes)
return result, pred_bboxes
def get_calibration_parameters(file):
with open(file, 'r') as f:
fin = f.readlines()
for line in fin:
if line[:2] == 'P2':
p_left = np.array(line[4:].strip().split(" ")).astype('float32').reshape(3,-1)
elif line[:2] == 'P3':
p_right = np.array(line[4:].strip().split(" ")).astype('float32').reshape(3,-1)
elif line[:7] == 'R0_rect':
p_ro_rect = np.array(line[9:].strip().split(" ")).astype('float32').reshape(3,-1)
elif line[:14] == 'Tr_velo_to_cam':
p_velo_to_cam = np.array(line[16:].strip().split(" ")).astype('float32').reshape(3,-1)
elif line[:14] == 'Tr_imu_to_velo':
p_imu_to_velo = np.array(line[16:].strip().split(" ")).astype('float32').reshape(3,-1)
return p_left, p_right, p_ro_rect, p_velo_to_cam, p_imu_to_velo
def decompose_projection_matrix(p):
k, r, t, _, _, _, _ = cv2.decomposeProjectionMatrix(p)
t = t / t[3] # Back from homogeneous
return k, r, t
def calc_depth_map(disp_left, k_left, t_left, t_right):
# Get the focal length from the K matrix
f = k_left[0, 0]
# Get the distance between the cameras from the t matrices (baseline)
b = abs(t_left[0] - t_right[0]) #On the setup page, you can see 0.54 as the distance between the two color cameras (http://www.cvlibs.net/datasets/kitti/setup.php)
# Replace all instances of 0 and -1 disparity with a small minimum value (to avoid div by 0 or negatives)
disp_left[disp_left == 0] = 0.1
disp_left[disp_left == -1] = 0.1
# Initialize the depth map to match the size of the disparity map
depth_map = np.ones(disp_left.shape, np.single)
# Calculate the depths
depth_map[:] = f * b / disp_left[:]
return depth_map
def compute_disparity(image, img_pair, num_disparities=6*16, block_size=11, window_size=6, matcher="stereo_sgbm", show_disparity=True):
"""
Create a Stereo BM or Stereo SGBM Matcher
Compute the Matching
Display the disparity image
Return it
"""
if matcher == "stereo_bm":
new_image = cv2.StereoBM_create(numDisparities=num_disparities,blockSize=block_size)
elif matcher == "stereo_sgbm":
'''
Understand parameters: https://docs.opencv.org/3.4/d2/d85/classcv_1_1StereoSGBM.html
'''
new_image = cv2.StereoSGBM_create(minDisparity=0, numDisparities=num_disparities, blockSize=block_size, P1=8 * 3 * window_size ** 2,
P2=32 * 3 * window_size ** 2, mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)
new_image = new_image.compute(image, img_pair).astype(np.float32)/16
if (show_disparity==True):
plt.figure(figsize = (40,20))
plt.imshow(new_image, cmap="cividis")
plt.show()
return new_image
images_L = sorted(glob.glob("data/left/*.png"))
images_R = sorted(glob.glob("data/right/*.png"))
labels = sorted(glob.glob("data/labels/*.txt"))
calib_files = sorted(glob.glob("data/calib/*.txt"))
print("There are",len(images_L),"images")
index = 2
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(cv2.cvtColor(cv2.imread(images_L[index]), cv2.COLOR_BGR2RGB))
ax1.set_title('Image Left', fontsize=30)
ax2.imshow(cv2.cvtColor(cv2.imread(images_R[index]), cv2.COLOR_BGR2RGB))
ax2.set_title('Image Right', fontsize=30)
"""
NUM_DISPARITIES:
the disparity search range.
For each pixel algorithm will find the best disparity from 0 (default minimum disparity) to numDisparities.
The search range can then be shifted by changing the minimum disparity.
--> The value is always greater than zero. In the current implementation, this parameter must be divisible by 16.
"""
num_d = (0,512,16)
"""
BLOCK SIZE: the linear size of the blocks compared by the algorithm.
Matched block size. It must be an odd number >=1 .
Normally, it should be somewhere in the 3..11 range.
--> Larger block size implies smoother, though less accurate disparity map.
--> Smaller block size gives more detailed disparity map, but there is higher chance for algorithm to find a wrong correspondence.
"""
b_s = (1,19,2)
"""
WINDOW SIZE:
Default 3; 5; 7 for SGBM reduced size image; 15 for SGBM full size image (1300px and above); 5 Works nicely
"""
window_s = (1,13,2)
"""
MIN DISPARITY
min: Minimum possible disparity value.
Normally, it is zero but sometimes rectification algorithms can shift images, so this parameter needs to be adjusted accordingly.
max: has to be dividable by 16 f. E. HH 192, 256, default:
#
"""
#Reading the Left Images
img_left = cv2.imread(images_L[index]) #OpenCV reads in BGR
img_left_gray = cv2.cvtColor(img_left, cv2.COLOR_BGR2GRAY)
# Reading the right Images
img_right = cv2.imread(images_R[index]) #OpenCV reads in BGR
img_right_gray = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY)
disparity_left = interactive(compute_disparity, image=fixed(img_left_gray), img_pair = fixed(img_right_gray), num_disparities=num_d, block_size=b_s, window_size=window_s, matcher=["stereo_sgbm", "stereo_bm"])
display(disparity_left)
disparity_right = compute_disparity(img_right_gray, img_pair=img_left_gray, num_disparities=disparity_left.kwargs["num_disparities"], block_size=disparity_left.kwargs["block_size"], window_size=disparity_left.kwargs["window_size"], matcher=disparity_left.kwargs["matcher"])
disparity_left = disparity_left.result
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(disparity_left, cmap="CMRmap_r") # or CMRmap_r
ax1.set_title('Disparity Left', fontsize=30)
ax2.imshow(disparity_right, cmap="CMRmap_r")
ax2.set_title('Disparity Right', fontsize=30)