diff --git a/inference.py b/inference.py new file mode 100644 index 0000000..6fc7463 --- /dev/null +++ b/inference.py @@ -0,0 +1,48 @@ +""" +inference script + +date: 3/17 +author: arabian9ts +""" + +import cv2 +import sys +from util.util import * +from model.ssd300 import * + +def inference(image_name): + if image_name is None: + return Exception('not specified image name to be drawed') + + fontType = cv2.FONT_HERSHEY_SIMPLEX + img, w, h, _, = preprocess('./voc2007/'+image_name) + pred_confs, pred_locs = ssd.infer(images=[img]) + locs, labels = ssd.ssd.detect_objects(pred_confs, pred_locs) + img = deprocess(img, w, h) + if len(labels) and len(locs): + for label, loc in zip(labels, locs): + loc = center2corner(loc) + loc = convert2diagonal_points(loc) + cv2.rectangle(img, (int(loc[0]*w), int(loc[1]*h)), (int(loc[2]*w), int(loc[3]*h)), (0, 0, 255), 1) + cv2.putText(img, str(int(label)), (int(loc[0]*w), int(loc[1]*h)), fontType, 0.7, (0, 0, 255), 1) + + return img + + +# detect objects on a specified image. +if 2 == len(sys.argv): + sess = tf.Session() + # tensorflow session + ssd = SSD300(sess) + sess.run(tf.global_variables_initializer()) + + # parameter saver + saver = tf.train.Saver() + saver.restore(sess, './checkpoints/params.ckpt') + img = inference(sys.argv[1]) + cv2.imwrite('./evaluated/'+sys.argv[1], img) + cv2.namedWindow("img", cv2.WINDOW_NORMAL) + cv2.imshow("img", img) + cv2.waitKey(0) + cv2.destroyAllWindows() + sys.exit() \ No newline at end of file diff --git a/model/SSD300.py b/model/SSD300.py index 6884134..6a4831c 100644 --- a/model/SSD300.py +++ b/model/SSD300.py @@ -47,12 +47,13 @@ def __init__(self, sess): # provides matching method self.matcher = Matcher(fmap_shapes, self.dboxes) - # evaluate loss - def eval(self, images, actual_data, is_training): - if not is_training: - feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images}) - return pred_confs, pred_locs + # inference process + def infer(self, images): + feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images}) + return pred_confs, pred_locs + # training process + def train(self, images, actual_data): # ================ RESET / EVAL ================ # positives = [] negatives = [] @@ -96,4 +97,4 @@ def prepare_loss(pred_confs, pred_locs, actual_labels, actual_locs): self.sess.run(self.train_step, \ feed_dict={self.input: images, self.pos: positives, self.neg: negatives, self.gt_labels: ex_gt_labels, self.gt_boxes: ex_gt_boxes}) - return pred_confs, pred_locs, batch_loc, batch_conf, batch_loss \ No newline at end of file + return pred_confs, pred_locs, batch_loc, batch_conf, batch_loss diff --git a/model/ssd300.py b/model/ssd300.py new file mode 100644 index 0000000..6a4831c --- /dev/null +++ b/model/ssd300.py @@ -0,0 +1,100 @@ +""" +SSD300 is SSD wrapper class. + +date: 10/18 +author: arabian9ts +""" + +import tensorflow as tf +import numpy as np + +from model.ssd import * +from matcher import Matcher +from model.computation import * +from model.default_box import * + + +class SSD300: + def __init__(self, sess): + """ + initialize SSD model as SSD300 whose input size is 300x300 + """ + self.sess = sess + + # define input placeholder and initialize ssd instance + self.input = tf.placeholder(shape=[None, 300, 300, 3], dtype=tf.float32) + self.ssd = SSD() + + # build ssd network => feature-maps and confs and locs tensor is returned + fmaps, confs, locs = self.ssd.build(self.input, is_training=True) + + # zip running set of tensor + self.pred_set = [fmaps, confs, locs] + + # required param from default-box and loss function + fmap_shapes = [map.get_shape().as_list() for map in fmaps] + # print('fmap shapes is '+str(fmap_shapes)) + self.dboxes = generate_boxes(fmap_shapes) + print(len(self.dboxes)) + + # required placeholder for loss + loss, loss_conf, loss_loc, self.pos, self.neg, self.gt_labels, self.gt_boxes = self.ssd.loss(len(self.dboxes)) + self.train_set = [loss, loss_conf, loss_loc] + # optimizer = tf.train.AdamOptimizer(0.05) + optimizer = tf.train.AdamOptimizer(learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam') + self.train_step = optimizer.minimize(loss) + + # provides matching method + self.matcher = Matcher(fmap_shapes, self.dboxes) + + # inference process + def infer(self, images): + feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images}) + return pred_confs, pred_locs + + # training process + def train(self, images, actual_data): + # ================ RESET / EVAL ================ # + positives = [] + negatives = [] + ex_gt_labels = [] + ex_gt_boxes = [] + # ===================== END ===================== # + + # call prepare_loss per image + # because matching method works with only one image + def prepare_loss(pred_confs, pred_locs, actual_labels, actual_locs): + pos_list, neg_list, t_gtl, t_gtb = self.matcher.matching(pred_confs, pred_locs, actual_labels, actual_locs) + positives.append(pos_list) + negatives.append(neg_list) + ex_gt_labels.append(t_gtl) + ex_gt_boxes.append(t_gtb) + + + feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images}) + + for i in range(len(images)): + actual_labels = [] + actual_locs = [] + # extract ground truth info + for obj in actual_data[i]: + loc = obj[:4] + label = np.argmax(obj[4:]) + + # transform location for voc2007 + loc = convert2wh(loc) + loc = corner2center(loc) + + actual_locs.append(loc) + actual_labels.append(label) + + prepare_loss(pred_confs[i], pred_locs[i], actual_labels, actual_locs) + + batch_loss, batch_conf, batch_loc = \ + self.sess.run(self.train_set, \ + feed_dict={self.input: images, self.pos: positives, self.neg: negatives, self.gt_labels: ex_gt_labels, self.gt_boxes: ex_gt_boxes}) + + self.sess.run(self.train_step, \ + feed_dict={self.input: images, self.pos: positives, self.neg: negatives, self.gt_labels: ex_gt_labels, self.gt_boxes: ex_gt_boxes}) + + return pred_confs, pred_locs, batch_loc, batch_conf, batch_loss diff --git a/train.py b/trainer.py similarity index 58% rename from train.py rename to trainer.py index f6ce686..7b08d16 100644 --- a/train.py +++ b/trainer.py @@ -13,8 +13,6 @@ import os os.environ['TF_CPP_MIN_LOG_LEVEL']='2' -import cv2 -import sys import datetime import tensorflow as tf import numpy as np @@ -24,11 +22,11 @@ from util.util import * from tqdm import trange -from model.SSD300 import * +from model.ssd300 import * # ====================== Training Parameters ====================== # BATCH_SIZE = 10 -EPOCH = 100 +EPOCH = 200 EPOCH_LOSSES = [] SHUFFLED_INDECES = [] # ============================== END ============================== # @@ -57,7 +55,7 @@ def next_batch(): for idx in indices: # make images mini batch - img = load_image('voc2007/'+keys[idx]) + img, _, _, _, = preprocess('voc2007/'+keys[idx]) actual_data.append(data[keys[idx]]) mini_batch.append(img) @@ -65,33 +63,6 @@ def next_batch(): buff.append((mini_batch, actual_data)) - def draw_marker(image_name, save): - if image_name is None: - return Exception('not specified image name to be drawed') - - img = cv2.imread('./voc2007/'+image_name, 1) - h = img.shape[0] - w = img.shape[1] - fontType = cv2.FONT_HERSHEY_SIMPLEX - reshaped = cv2.resize(img, (300, 300)) - reshaped = reshaped / 255 - pred_confs, pred_locs = ssd.eval(images=[reshaped], actual_data=None, is_training=False) - locs, labels = ssd.ssd.detect_objects(pred_confs, pred_locs) - if len(labels) and len(locs): - for label, loc in zip(labels, locs): - loc = center2corner(loc) - loc = convert2diagonal_points(loc) - cv2.rectangle(img, (int(loc[0]*w), int(loc[1]*h)), (int(loc[2]*w), int(loc[3]*h)), (0, 0, 255), 1) - cv2.putText(img, str(int(label)), (int(loc[0]*w), int(loc[1]*h)), fontType, 0.7, (0, 0, 255), 1) - - if save: - if not os.path.exists('./evaluated'): - os.mkdir('./evaluated') - cv2.imwrite('./evaluated/'+image_name, img) - - return img - - # tensorflow session ssd = SSD300(sess) sess.run(tf.global_variables_initializer()) @@ -99,16 +70,6 @@ def draw_marker(image_name, save): # parameter saver saver = tf.train.Saver() - # eval and predict object on a specified image. - if 2 == len(sys.argv): - saver.restore(sess, './checkpoints/params.ckpt') - img = draw_marker(sys.argv[1], save=False) - cv2.namedWindow("img", cv2.WINDOW_NORMAL) - cv2.imshow("img", img) - cv2.waitKey(0) - cv2.destroyAllWindows() - sys.exit() - # saver.restore(sess, './checkpoints/params.ckpt') SHUFFLED_INDECES = list(np.random.permutation(len(keys))) @@ -124,30 +85,17 @@ def draw_marker(image_name, save): for ba in trange(BATCH): batch, actual = buff.pop(0) threading.Thread(name='load', target=next_batch).start() - _, _, batch_loc, batch_conf, batch_loss = ssd.eval(batch, actual, True) + _, _, batch_loc, batch_conf, batch_loss = ssd.train(batch, actual) BATCH_LOSSES.append(batch_loss) # print('BATCH: {0} / EPOCH: {1}, LOSS: {2}'.format(ba+1, ep+1, batch_loss)) EPOCH_LOSSES.append(np.mean(BATCH_LOSSES)) print('\n*** AVERAGE: '+str(EPOCH_LOSSES[-1])+' ***') - saver.save(sess, './checkpoints/params.ckpt') - - - print('\n*** TEST ***') - id = np.random.choice(len(keys)) - name = keys[id] - draw_marker(image_name=name, save=True) - print('\nSaved Evaled Image') - - print('\n========== EPOCH: '+str(ep+1)+' END ==========') print('\nEND LEARNING') - - saver.save(sess, './params_final.ckpt') - plt.xlabel('Epoch') plt.ylabel('Loss') plt.plot(np.array(range(EPOCH)), EPOCH_LOSSES) diff --git a/util/util.py b/util/util.py index 8e0974e..7bd5fbf 100644 --- a/util/util.py +++ b/util/util.py @@ -6,19 +6,33 @@ author: arabian9ts """ -import numpy -import skimage -import skimage.io -import skimage.transform +import numpy as np +from scipy.misc import imread, imresize -def load_image(path): +def preprocess(path): """ load specified image Args: image path - Return: resized image + Return: resized image, its size and channel """ - img = skimage.io.imread(path) - img = img / 255. - resized_img = skimage.transform.resize(img, (300, 300)) - return numpy.array(resized_img, dtype=numpy.float32) + img = imread(path) + h, w, c = img.shape + img = imresize(img, (300, 300)) + img = img[:, :, ::-1].astype('float32') + img /= 255. + return img, w, h, c + + +def deprocess(x, w, h): + """ + restore processed image + + Args: processed image + Return: restored image + """ + # x = x[:, :, ::-1] + x *= 255. + x = np.clip(x, 0, 255).astype('uint8') + x = imresize(x, (h, w)) + return x \ No newline at end of file