Preliminary +``` +sudo apt install python3-dev python-catkin-tools python3-catkin-pkg-modules python3-rospkg-modules python3-empy python3-yaml +``` + +1. Set up catkin workspace and download this repo +``` +mkdir src && cd src +git clone +``` + +2. Download cv_bridge and configure it for Python3 (required by feature_extraction for using cv_bridge in Python3) +``` +git clone -b melodic +cd .. +# change the path in the following command according to your Python version +catkin config -DPYTHON_EXECUTABLE=/usr/bin/python3 -DPYTHON_INCLUDE_DIR=/usr/include/python3.6m -DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/ +``` + +3. Build +``` +. /opt/ros/melodic/setup.bash +catkin build +``` + +4. Donwload one of the saved [HF-Net]( models from [here](, and unzip it. + +# Run + +### Feature extraction + +Start the feature extraction node, which will subscribe to one or more image topic(s) and publish the extracted image features on corresponding topic(s) with `/features` suffix. +``` +. protect latest_msgs + self.thread = threading.Thread(target=self.worker) + self.thread.start() + + def subscribe(self, topic): + output_topic = '/'.join(topic.split('/')[:-1]) + '/features' + self.publishers[topic] = rospy.Publisher(output_topic, ImageFeatures, queue_size=1) + with self.lock: + self.latest_msgs[topic] = None + callback = lambda msg: self.callback(msg, topic) + self.subscribers[topic] = rospy.Subscriber(topic, Image, callback, queue_size=1) + + def callback(self, msg, topic): + # keep only the lastest message + with self.lock: + self.latest_msgs[topic] = msg + + def worker(self): + while not rospy.is_shutdown(): + no_new_msg = True + # take turn to process each topic + for topic in self.latest_msgs.keys(): + with self.lock: + msg = self.latest_msgs[topic] + self.latest_msgs[topic] = None + if msg is None: + rospy.loginfo_throttle(3, topic + ': no message received') + continue + self.process(msg, topic) + no_new_msg = False + if no_new_msg: time.sleep(0.01) + + def process(self, msg, topic): + start_time = time.time() + if msg.encoding == '8UC1' or msg.encoding == 'mono8': + image_gray = self.cv_bridge.imgmsg_to_cv2(msg) + if self.gui: image_color = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2BGR) + else: + image_color = self.cv_bridge.imgmsg_to_cv2(msg, 'bgr8') + image_gray = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY) + t2 = time.time() + features = + t3 = time.time() + if (features['keypoints'].shape[0] != 0): + feature_msg = features_to_ros_msg(features, msg) + self.publishers[topic].publish(feature_msg) + end_time = time.time() + rospy.loginfo(topic + ': %.2f | %.2f ms (%d keypoints)' % ( + (end_time-start_time) * 1000, + (t3 - t2) * 1000, + features['keypoints'].shape[0])) + if self.gui: + draw_keypoints(image_color, features['keypoints'], features['scores']) + cv2.imshow(topic, image_color) + cv2.waitKey(1) + +def draw_keypoints(image, keypoints, scores): + upper_score = 0.5 + lower_score = 0.1 + scale = 1 / (upper_score - lower_score) + for p,s in zip(keypoints, scores): + s = min(max(s - lower_score, 0) * scale, 1) + color = (255 * (1 - s), 255 * (1 - s), 255) # BGR +, tuple(p), 3, color, 2) + +def features_to_ros_msg(features, img_msg): + msg = ImageFeatures() + msg.header = img_msg.header + = False + for kp in features['keypoints']: + p = KeyPoint() + p.x = kp[0] + p.y = kp[1] + msg.keypoints.append(p) + msg.scores = features['scores'].flatten() + = features['local_descriptors'].flatten() + shape = features['local_descriptors'][0].shape + msg.descriptors.layout.dim.append(MultiArrayDimension()) + msg.descriptors.layout.dim[0].label = 'keypoint' + msg.descriptors.layout.dim[0].size = shape[0] + msg.descriptors.layout.dim[0].stride = shape[0] * shape[1] + msg.descriptors.layout.dim.append(MultiArrayDimension()) + msg.descriptors.layout.dim[1].label = 'descriptor' + msg.descriptors.layout.dim[1].size = shape[1] + msg.descriptors.layout.dim[1].stride = shape[1] + msg.global_descriptor = features['global_descriptor'][0] + return msg + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/feature_extraction/ b/feature_extraction/ new file mode 100644 index 0000000..f090fe3 --- /dev/null +++ b/feature_extraction/ @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +import tensorflow as tf +from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.saved_model import tag_constants +import cv2 +import numpy as np +tf.contrib.resampler + +default_config = { + 'model_path': 'models/hfnet_tf', + 'keypoint_number': 500, + 'keypoint_threshold': 0.002, + 'nms_iterations': 1, + 'nms_radius': 1, +} + + +class FeatureNet: + def __init__(self, config=default_config): + self.graph = tf.Graph() + self.graph.as_default() + self.sess = tf.Session(graph=self.graph) + tf.saved_model.loader.load( + self.sess, + [tag_constants.SERVING], + config['model_path']) + self.net_image_in = self.graph.get_tensor_by_name('image:0') + self.net_scores = self.graph.get_tensor_by_name('scores:0') + self.net_logits = self.graph.get_tensor_by_name('logits:0') + self.net_local_desc = self.graph.get_tensor_by_name('local_descriptors:0') + self.net_global_decs = self.graph.get_tensor_by_name('global_descriptor:0') + self.keypoints, self.scores = self.select_keypoints( + self.net_scores, config['keypoint_number'], config['keypoint_threshold'], + config['nms_iterations'], config['nms_radius']) + # inverse ratio for upsampling (should be approx. 1/8) + self.scaling_op = ((tf.cast(tf.shape(self.net_local_desc)[1:3], tf.float32) - 1.) + / (tf.cast(tf.shape(self.net_image_in)[1:3], tf.float32) - 1.)) + # bicubic interpolation (upsample X8 to the image size) and L2-normalization + self.local_descriptors_op = \ + tf.nn.l2_normalize( + tf.contrib.resampler.resampler( + self.net_local_desc, + self.scaling_op[::-1] * tf.to_float(self.keypoints)), + -1) + + + def simple_nms(self, scores, iterations, radius): + """Performs non maximum suppression (NMS) on the heatmap using max-pooling. + This method does not suppress contiguous points that have the same score. + It is an approximate of the standard NMS and uses iterative propagation. + Arguments: + scores: the score heatmap, with shape `[B, H, W]`. + size: an interger scalar, the radius of the NMS window. + """ + if iterations < 1: return scores + with self.graph.as_default(): + with tf.name_scope('simple_nms'): + radius = tf.constant(radius, name='radius') + size = radius*2 + 1 + + max_pool = lambda x: gen_nn_ops.max_pool_v2( # supports dynamic ksize + x[..., None], ksize=[1, size, size, 1], + strides=[1, 1, 1, 1], padding='SAME')[..., 0] + zeros = tf.zeros_like(scores) + max_mask = tf.equal(scores, max_pool(scores)) + for _ in range(iterations-1): + supp_mask = tf.cast(max_pool(tf.to_float(max_mask)), tf.bool) + supp_scores = tf.where(supp_mask, zeros, scores) + new_max_mask = tf.equal(supp_scores, max_pool(supp_scores)) + max_mask = max_mask | (new_max_mask & tf.logical_not(supp_mask)) + return tf.where(max_mask, scores, zeros) + + + def select_keypoints(self, scores, keypoint_number, keypoint_threshold, nms_iterations, nms_radius): + with self.graph.as_default(): + scores = self.simple_nms(scores, nms_iterations, nms_radius) + with tf.name_scope('keypoint_extraction'): + keypoints = tf.where(tf.greater_equal( + scores[0], keypoint_threshold)) + scores = tf.gather_nd(scores[0], keypoints) + with tf.name_scope('top_k_keypoints'): + k = tf.constant(keypoint_number, name='k') + k = tf.minimum(tf.shape(scores)[0], k) + scores, indices = tf.nn.top_k(scores, k) + keypoints = tf.to_int32(tf.gather( + tf.to_float(keypoints), indices)) + keypoints, scores = keypoints[None], scores[None] + keypoints = keypoints[..., ::-1] # x-y convention + return keypoints, scores + + + def infer(self, image): + if len(image.shape) == 2: # grayscale + image_in = image[None,:,:,None] + else: + image_in = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)[None,:,:,None] + results = + [self.scores, # (1, num_keypoints) float32 + self.net_logits, # (1, 60, 80, 65) float32 + self.net_local_desc, # (1, 60, 80, 256) float32 + self.net_global_decs, # (1, 4096) float32 + self.local_descriptors_op,# (1, num_keypoints, 256) float32 + self.keypoints[0]], # (num_keypoints, 2) int64 + feed_dict = {self.net_image_in: image_in}) + + features = {} + features['keypoints'] = results[-1] + features['scores'] = results[0][0] + features['local_descriptors'] = results[-2] + features['global_descriptor'] = results[-3] + return features diff --git a/feature_extraction/ b/feature_extraction/ new file mode 100644 index 0000000..180b8b4 --- /dev/null +++ b/feature_extraction/ @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import tensorflow as tf +import cv2 +import numpy as np +from openvino.inference_engine import IENetwork +from openvino.inference_engine import IEPlugin,IECore +import os +from tensorflow.python.ops import gen_nn_ops +tf.enable_eager_execution() + +default_config = { + 'cpu_extension': "/opt/intel/openvino/inference_engine/lib/intel64/", + 'model_path': 'models/hfnet_vino', + 'model_file': "hfnet.xml", + 'weights_file': "hfnet.bin", + 'keypoint_number': 500, + 'keypoint_threshold': 0.002, + 'nms_iterations': 1, + 'nms_radius': 1, +} + +class FeatureNet: + def __init__(self, config=default_config): + self.config = config + = IECore() + if os.path.exists(config['cpu_extension']): +['cpu_extension'], 'CPU') + else: + print('CPU extension file does not exist: %s' % config['cpu_extension']) + model = os.path.join(config['model_path'], config['model_file']) + weights = os.path.join(config['model_path'], config['weights_file']) + = IENetwork(model=model, weights=weights) + # Input size is specified by the OpenVINO model + input_shape =['image'].shape + self.input_size = (input_shape[3], input_shape[2]) + self.scaling_desc = (np.array(self.input_size) / 8 - 1.) / (np.array(self.input_size) - 1.) + print('OpenVINO model input size: (%d, %d)' % (self.input_size[0], self.input_size[1])) + self.input_blob = next(iter( + self.out_blob = next(iter( + = 1 + self.exec_net =, device_name="CPU") + + def simple_nms(self, scores, iterations, radius): + """Performs non maximum suppression (NMS) on the heatmap using max-pooling. + This method does not suppress contiguous points that have the same score. + It is an approximate of the standard NMS and uses iterative propagation. + Arguments: + scores: the score heatmap, with shape `[B, H, W]`. + size: an interger scalar, the radius of the NMS window. + """ + if iterations < 1: return scores + radius = tf.constant(radius, name='radius') + size = radius*2 + 1 + + max_pool = lambda x: gen_nn_ops.max_pool_v2( # supports dynamic ksize + x[..., None], ksize=[1, size, size, 1], + strides=[1, 1, 1, 1], padding='SAME')[..., 0] + zeros = tf.zeros_like(scores) + max_mask = tf.equal(scores, max_pool(scores)) + for _ in range(iterations-1): + supp_mask = tf.cast(max_pool(tf.to_float(max_mask)), tf.bool) + supp_scores = tf.where(supp_mask, zeros, scores) + new_max_mask = tf.equal(supp_scores, max_pool(supp_scores)) + max_mask = max_mask | (new_max_mask & tf.logical_not(supp_mask)) + return tf.where(max_mask, scores, zeros) + + def select_keypoints(self, scores, keypoint_number, keypoint_threshold, nms_iterations, nms_radius): + scores = self.simple_nms(scores, nms_iterations, nms_radius) + keypoints = tf.where(tf.greater_equal( + scores[0], keypoint_threshold)) + scores = tf.gather_nd(scores[0], keypoints) + k = tf.constant(keypoint_number, name='k') + k = tf.minimum(tf.shape(scores)[0], k) + scores, indices = tf.nn.top_k(scores, k) + keypoints = tf.to_int32(tf.gather( + tf.to_float(keypoints), indices)) + return np.array(keypoints), np.array(scores) + + def select_keypoints_threshold(self, scores, keypoint_threshold, scale): + keypoints = tf.where(tf.greater_equal(scores[0], self.config['keypoint_threshold'])).numpy() + keypoints = np.array(keypoints) + scores = np.array([scores[0, i[0], i[1]] for i in keypoints]) + return keypoints, scores + + def infer(self, image): + if len(image.shape) == 3: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + scale = [image.shape[1] / self.input_size[0], image.shape[0] / self.input_size[1]] + image_scaled = cv2.resize(image, self.input_size)[:,:,None] + image_scaled = image_scaled.transpose((2, 0, 1)) + res = self.exec_net.infer(inputs={self.input_blob: np.expand_dims(image_scaled, axis=0)}) + + features = {} + scores = res['pred/local_head/detector/Squeeze'] + if self.config['keypoint_number'] == 0 and self.config['nms_iterations'] == 0: + keypoints, features['scores'] = self.select_keypoints_threshold(scores, + self.config['keypoint_threshold'], scale) + else: + keypoints, features['scores'] = self.select_keypoints(scores, + self.config['keypoint_number'], self.config['keypoint_threshold'], + self.config['nms_iterations'], self.config['nms_radius']) + # scaling back and x-y conversion + features['keypoints'] = np.array([[int(i[1] * scale[0]), int(i[0] * scale[1])] for i in keypoints]) + + local = np.transpose(res['pred/local_head/descriptor/Conv_1/BiasAdd/Normalize'],(0,2,3,1)) + if len(features['keypoints']) > 0: + features['local_descriptors'] = \ + tf.nn.l2_normalize( + tf.contrib.resampler.resampler( + local, + tf.to_float(self.scaling_desc)[::-1]*tf.to_float(features['keypoints'][None])), + -1).numpy() + else: + features['local_descriptors'] = np.array([[]]) + + features['global_descriptor'] = res['pred/global_head/dimensionality_reduction/BiasAdd/Normalize'] + + return features diff --git a/feature_extraction/package.xml b/feature_extraction/package.xml new file mode 100644 index 0000000..60943d0 --- /dev/null +++ b/feature_extraction/package.xml @@ -0,0 +1,62 @@ + + + feature_extraction + 0.0.0 + The feature_extraction package + + + + + Xuesong Shi + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + rospy + rospy + rospy + image_feature_msgs + + + + + + + diff --git a/feature_extraction/ b/feature_extraction/ new file mode 100755 index 0000000..810bc37 --- /dev/null +++ b/feature_extraction/ @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +""" +A non-ROS script to visualize extracted keypoints of given images +""" + +import os +import cv2 +import numpy as np +import time +import threading +import sys + +def main(): + net_name = 'hfnet_vino' + gui = True + if net_name == 'hfnet_vino': + from hfnet_vino import FeatureNet, default_config + elif net_name == 'hfnet_tf': + from hfnet_tf import FeatureNet, default_config + else: + exit('Unknown net %s' % net_name) + config = default_config + #config['keypoint_threshold'] = 0 + net = FeatureNet(config) + filenames = sys.argv[1:] + for f in filenames: + image = cv2.imread(f) + image = cv2.resize(image, (640, 480)) + start_time = time.time() + features = net.infer(image) + end_time = time.time() + num_keypoints = features['keypoints'].shape[0] + print(f + ': ' + str(image.shape) + + ', %d keypoints, %.2f ms' % (num_keypoints, (end_time - start_time) * 1000)) + if gui: + draw_keypoints(image, features['keypoints'], features['scores']) + title = f + ' (' + net_name + ', ' + str(num_keypoints) + ' keypoints)' + cv2.imshow(title, image) + cv2.waitKey() + +def draw_keypoints(image, keypoints, scores): + upper_score = 0.2 # keypoints with this score or higher will have a red circle + lower_score = 0.002 # keypoints with this score or lower will have a white circle + scale = 1 / (upper_score - lower_score) + for p,s in zip(keypoints, scores): + s = min(max(s - lower_score, 0) * scale, 1) + color = (255 * (1 - s), 255 * (1 - s), 255) # BGR +, tuple(p), 3, color, 1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/feature_extraction/ b/feature_extraction/ new file mode 100755 index 0000000..e9a87f5 --- /dev/null +++ b/feature_extraction/ @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +A non-ROS script to visualize extracted keypoints and their matches of given image pairs +""" + +import os +import cv2 +import numpy as np +import time +import threading +import sys + +def main(): + net_name = 'hfnet_vino' + gui = True + if net_name == 'hfnet_vino': + from hfnet_vino import FeatureNet, default_config + elif net_name == 'hfnet_tf': + from hfnet_tf import FeatureNet, default_config + else: + exit('Unknown net %s' % net_name) + config = default_config + #config['keypoint_threshold'] = 0.001 + net = FeatureNet(config) + filenames = sys.argv[1:] + file_features = {} + for f in filenames: + image = cv2.imread(f) + #image = cv2.resize(image, (640, 480)) + #cv2.imshow(f, image) + start_time = time.time() + features = net.infer(image) + end_time = time.time() + num_keypoints = features['keypoints'].shape[0] + print(f + ': ' + str(image.shape) + + ', %d keypoints, %.2f ms' % (num_keypoints, (end_time - start_time) * 1000)) + file_features[f] = features + file_features[f]['image'] = image + if gui: + draw_keypoints(image, features['keypoints'], features['scores']) + title = f + ' (' + net_name + ', ' + str(num_keypoints) + ' keypoints)' + cv2.imshow(title, image) + cv2.waitKey() + + f1 = filenames[0] + for f2 in filenames[1:]: + distance = np.linalg.norm(file_features[f1]['global_descriptor'] \ + - file_features[f2]['global_descriptor']) + des1 = list(file_features[f1]['local_descriptors']) + des2 = list(file_features[f2]['local_descriptors']) + des1 = np.squeeze(file_features[f1]['local_descriptors']) + des2 = np.squeeze(file_features[f2]['local_descriptors']) + kp1 = [cv2.KeyPoint(p[0], p[1], _size=2) for p in file_features[f1]['keypoints']] + kp2 = [cv2.KeyPoint(p[0], p[1], _size=2) for p in file_features[f2]['keypoints']] + img1 = file_features[f1]['image'] + img2 = file_features[f2]['image'] + + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + matches = bf.match(des1, des2) + #matches = sorted(matches, key = lambda x:x.distance) + match_img = cv2.drawMatches(img1, kp1, img2, kp2, matches, None, flags=2) + title = os.path.splitext(os.path.basename(f1))[0] + '-' + \ + os.path.splitext(os.path.basename(f2))[0] + '-' + str(distance) + cv2.imshow(title, match_img) + cv2.imwrite(title + '.jpg', match_img) + cv2.waitKey() + +def draw_keypoints(image, keypoints, scores): + upper_score = 0.5 + lower_score = 0.1 + scale = 1 / (upper_score - lower_score) + for p,s in zip(keypoints, scores): + s = min(max(s - lower_score, 0) * scale, 1) + color = (255 * (1 - s), 255 * (1 - s), 255) # 