cleaning up

misslibra · Mar 11, 2019 · ff4f930 · ff4f930
1 parent 671b672
commit ff4f930
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 62 deletions.
diff --git a/Run.py b/Run.py
@@ -48,7 +48,7 @@ def main():
     else:
         print('Using previous model %s'%model_lst[-1])
         my_vgg = vgg.vgg19_bn(pretrained=True)
-        #TODO model in Cuda throws an error
+        # TODO: load bins from file or something
         model = Model.Model(features=my_vgg.features, bins=2).cuda()
         checkpoint = torch.load(weights_path + '/%s'%model_lst[-1])
         model.load_state_dict(checkpoint['model_state_dict'])
@@ -64,15 +64,25 @@ def main():
     angle_bins = generate_bins(2)
 
     img_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/image_2/'
+
+    # using P from each frame
     calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/'
+
+    # using P_rect from global calibration file
+    # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/camera_cal/'
+    # calib_file = calib_path + "calib_cam_to_cam.txt"
+
     ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
 
     for id in ids:
-        
+
         start_time = time.time()
 
         img_file = img_path + id + ".png"
+
+        # P for each frame
         calib_file = calib_path + id + ".txt"
+
         truth_img = cv2.imread(img_file)
         img = np.copy(truth_img)
         yolo_img = np.copy(truth_img)
@@ -93,7 +103,7 @@ def main():
 
             theta_ray = object.theta_ray
             input_img = object.img
-            K = object.K
+            proj_matrix = object.proj_matrix
             box_2d = detection.box_2d
             detected_class = detection.detected_class
 
@@ -115,7 +125,7 @@ def main():
             alpha += angle_bins[argmax]
             alpha -= np.pi
 
-            location = plot_regressed_3d_bbox(img, truth_img, K, box_2d, dim, alpha, theta_ray)
+            location = plot_regressed_3d_bbox(img, truth_img, proj_matrix, box_2d, dim, alpha, theta_ray)
 
             print('Estimated pose: %s'%location)
 

diff --git a/Run_no_yolo.py b/Run_no_yolo.py
@@ -58,10 +58,10 @@ def main():
     weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
     model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')]
     if len(model_lst) == 0:
-        print 'No previous model found, please train first!'
+        print('No previous model found, please train first!')
         exit()
     else:
-        print 'Using previous model %s'%model_lst[-1]
+        print ('Using previous model %s'%model_lst[-1])
         my_vgg = vgg.vgg19_bn(pretrained=True)
         #TODO model in Cuda throws an error
         model = Model.Model(features=my_vgg.features, bins=2)
@@ -107,9 +107,9 @@ def main():
 
             location = plot_regressed_3d_bbox(img, truth_img, cam_to_img, label['Box_2D'], dim, alpha, theta_ray)
 
-            print 'Estimated pose: %s'%location
-            print 'Truth pose: %s'%label['Location']
-            print '-------------'
+            print('Estimated pose: %s'%location)
+            print('Truth pose: %s'%label['Location'])
+            print('-------------')
 
             # plot car by car
             if single_car:

diff --git a/library/File.py b/library/File.py
@@ -1,25 +1,30 @@
+"""
+Functions to read from files
+TODO: move the functions that read label from Dataset into here
+"""
 import numpy as np
 
-# read camera cal file and get intrinsic params
-# this is actually the projection matrix
+
 def get_calibration_cam_to_image(cab_f):
     for line in open(cab_f):
         if 'P2:' in line:
             cam_to_img = line.strip().split(' ')
             cam_to_img = np.asarray([float(number) for number in cam_to_img[1:]])
             cam_to_img = np.reshape(cam_to_img, (3, 4))
-            # cam_to_img[:,3] = 1
             return cam_to_img
 
-def get_K(cab_f):
+    file_not_found(cab_f)
+
+def get_P(cab_f):
     for line in open(cab_f):
-        if 'K_02' in line:
-            cam_K = line.strip().split(' ')
-            cam_K = np.asarray([float(cam_K) for cam_K in cam_K[1:]])
+        if 'P_rect_02' in line:
+            cam_P = line.strip().split(' ')
+            cam_P = np.asarray([float(cam_P) for cam_P in cam_P[1:]])
             return_matrix = np.zeros((3,4))
-            return_matrix[:,:-1] = cam_K.reshape((3,3))
+            return_matrix = cam_P.reshape((3,4))
+            return return_matrix
 
-    return return_matrix
+    file_not_found(cab_f)
 
 def get_R0(cab_f):
     for line in open(cab_f):
@@ -46,3 +51,7 @@ def get_tr_to_velo(cab_f):
             Tr_to_velo[:3,:4] = Tr
 
             return Tr_to_velo
+
+def file_not_found(filename):
+    print("\nError! Can't read calibration file, does %s exist?"%filename)
+    exit()
diff --git a/library/Math.py b/library/Math.py
@@ -52,7 +52,7 @@ def create_corners(dimension, location=None, R=None):
 # this is based on the paper. Math!
 # calib is a 3x4 matrix, box_2d is [(xmin, ymin), (xmax, ymax)]
 # Math help: http://ywpkwon.github.io/pdf/bbox3d-study.pdf
-def calc_location(dimension, K, box_2d, alpha, theta_ray):
+def calc_location(dimension, proj_matrix, box_2d, alpha, theta_ray):
     #global orientation
     orient = alpha + theta_ray
     R = rotation_matrix(orient)
@@ -172,7 +172,7 @@ def calc_location(dimension, K, box_2d, alpha, theta_ray):
             RX = np.dot(R, X)
             M[:3,3] = RX.reshape(3)
 
-            M = np.dot(K, M)
+            M = np.dot(proj_matrix, M)
 
             A[row, :] = M[index,:3] - box_corners[row] * M[2,:3]
             b[row] = box_corners[row] * M[2,3] - M[index,3]

diff --git a/torch_lib/Dataset.py b/torch_lib/Dataset.py
@@ -29,9 +29,8 @@ def __init__(self, path, bins=2, overlap=0.1):
         self.top_calib_path = path + "/calib/"
         # use a relative path instead?
 
-        # TODO: cal vs. projection
-        # this one looks a little off
-        self.k = self.get_K(os.path.abspath(os.path.dirname(os.path.dirname(__file__)) + '/camera_cal/calib_cam_to_cam.txt'))
+        # TODO: which camera cal to use, per frame or global one?
+        self.proj_matrix = get_P(os.path.abspath(os.path.dirname(os.path.dirname(__file__)) + '/camera_cal/calib_cam_to_cam.txt'))
 
         self.ids = [x.split('.')[0] for x in sorted(os.listdir(self.top_img_path))] # name of file
         self.num_images = len(self.ids)
@@ -86,7 +85,8 @@ def __getitem__(self, index):
             self.curr_img = cv2.imread(self.top_img_path + '%s.png'%id)
 
         label = self.labels[id][str(line_num)]
-        obj = DetectedObject(self.curr_img, label['Class'], label['Box_2D'], self.k, label=label)
+        # P doesn't matter here
+        obj = DetectedObject(self.curr_img, label['Class'], label['Box_2D'], self.proj_matrix, label=label)
 
         return obj.img, label
 
@@ -119,16 +119,6 @@ def get_label(self, id, line_num):
 
         return label
 
-    def get_K(self, cab_f):
-        for line in open(cab_f, 'r'):
-            if 'K_02' in line:
-                cam_K = line.strip().split(' ')
-                cam_K = np.asarray([float(cam_K) for cam_K in cam_K[1:]])
-                return_matrix = np.zeros((3,4))
-                return_matrix[:,:-1] = cam_K.reshape((3,3))
-
-        return return_matrix
-
     def get_bin(self, angle):
 
         bin_idxs = []
@@ -233,19 +223,22 @@ def all_objects(self):
             img = cv2.imread(img_path)
             data[id]['Image'] = img
 
+            # using p per frame
             calib_path = self.top_calib_path + '%s.txt'%id
-            k = get_calibration_cam_to_image(calib_path)
-            data[id]['Calib'] = k
-            # data[id]['Calib'] = self.k
+            proj_matrix = get_calibration_cam_to_image(calib_path)
+
+            # using P_rect from global calib file
+            proj_matrix = self.proj_matrix
+
+            data[id]['Calib'] = proj_matrix
 
             label_path = self.top_label_path + '%s.txt'%id
             labels = self.parse_label(label_path)
             objects = []
             for label in labels:
                 box_2d = label['Box_2D']
                 detection_class = label['Class']
-                objects.append(DetectedObject(img, detection_class, box_2d, k, label=label))
-                # objects.append(DetectedObject(img, detection_class, box_2d, self.k, label=label))
+                objects.append(DetectedObject(img, detection_class, box_2d, proj_matrix, label=label))
 
             data[id]['Objects'] = objects
 
@@ -258,31 +251,21 @@ def all_objects(self):
 is to keep this abstract enough so it can be used in combination with YOLO
 """
 class DetectedObject:
-    def __init__(self, img, detection_class, box_2d, K, label=None):
+    def __init__(self, img, detection_class, box_2d, proj_matrix, label=None):
 
-        if isinstance(K, str): # filename
-            K = get_calibration_cam_to_image(K)
+        if isinstance(proj_matrix, str): # filename
+            # proj_matrix = get_P(proj_matrix)
+            proj_matrix = get_calibration_cam_to_image(proj_matrix)
 
-        self.K = K
-        self.theta_ray = self.calc_theta_ray(img, box_2d, K)
+        self.proj_matrix = proj_matrix
+        self.theta_ray = self.calc_theta_ray(img, box_2d, proj_matrix)
         self.img = self.format_img(img, box_2d)
         self.label = label
         self.detection_class = detection_class
 
-
-    def get_K(self, cab_f):
-        for line in open(cab_f, 'r'):
-            if 'K_02' in line:
-                cam_K = line.strip().split(' ')
-                cam_K = np.asarray([float(cam_K) for cam_K in cam_K[1:]])
-                return_matrix = np.zeros((3,4))
-                return_matrix[:,:-1] = cam_K.reshape((3,3))
-
-        return return_matrix
-
-    def calc_theta_ray(self, img, box_2d, K):
+    def calc_theta_ray(self, img, box_2d, proj_matrix):
         width = img.shape[1]
-        fovx = 2 * np.arctan(width / (2 * K[0][0]))
+        fovx = 2 * np.arctan(width / (2 * proj_matrix[0][0]))
         center = (box_2d[1][0] + box_2d[0][0]) / 2
         dx = center - (width / 2)
 
@@ -297,8 +280,10 @@ def calc_theta_ray(self, img, box_2d, K):
 
     def format_img(self, img, box_2d):
 
+        # Should this happen? or does normalize take care of it. YOLO doesnt like
         # img=img.astype(np.float) / 255
 
+        # torch transforms
         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225])
         process = transforms.Compose ([
@@ -307,16 +292,12 @@ def format_img(self, img, box_2d):
         ])
 
         # crop image
-
         pt1 = box_2d[0]
         pt2 = box_2d[1]
         crop = img[pt1[1]:pt2[1]+1, pt1[0]:pt2[0]+1]
         crop = cv2.resize(src = crop, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
 
-
-        # cv2.imshow('hello', crop) # to see the input cropped section
-        # cv2.waitKey(0)
-
         # recolor, reformat
         batch = process(crop)
+
         return batch