diff --git a/.gitignore b/.gitignore index bec010d3..00a55888 100644 --- a/.gitignore +++ b/.gitignore @@ -114,4 +114,5 @@ data/ tmp/ exp/json tmp_*/ -example/res/ \ No newline at end of file +example/res/ +data/ diff --git a/alphapose/datasets/__init__.py b/alphapose/datasets/__init__.py index 5b5b469e..6289938e 100644 --- a/alphapose/datasets/__init__.py +++ b/alphapose/datasets/__init__.py @@ -3,8 +3,24 @@ from .custom import CustomDataset from .mscoco import Mscoco from .mpii import Mpii +from .coco_wholebody import coco_wholebody +from .coco_wholebody_det import coco_wholebody_det from .halpe_26 import Halpe_26 from .halpe_136 import Halpe_136 from .halpe_136_det import Halpe_136_det from .halpe_26_det import Halpe_26_det -__all__ = ['CustomDataset', 'Halpe_136', 'Halpe_26_det', 'Halpe_136_det', 'Halpe_26', 'Mscoco', 'Mscoco_det', 'Mpii', 'ConcatDataset', 'coco_wholebody', 'coco_wholebody_det'] +from .halpe_coco_wholebody_26 import Halpe_coco_wholebody_26 +from .halpe_coco_wholebody_26_det import Halpe_coco_wholebody_26_det +from .halpe_coco_wholebody_136 import Halpe_coco_wholebody_136 +from .halpe_coco_wholebody_136_det import Halpe_coco_wholebody_136_det +from .halpe_68_noface import Halpe_68_noface +from .halpe_68_noface_det import Halpe_68_noface_det +from .single_hand import SingleHand +from .single_hand_det import SingleHand_det + +__all__ = ['CustomDataset', 'ConcatDataset', 'Mpii', 'Mscoco', 'Mscoco_det', \ + 'Halpe_26', 'Halpe_26_det', 'Halpe_136', 'Halpe_136_det', \ + 'Halpe_coco_wholebody_26', 'Halpe_coco_wholebody_26_det', \ + 'Halpe_coco_wholebody_136', 'Halpe_coco_wholebody_136_det', \ + 'Halpe_68_noface', 'Halpe_68_noface_det', 'SingleHand', 'SingleHand_det', \ + 'coco_wholebody', 'coco_wholebody_det'] diff --git a/alphapose/datasets/coco_det.py b/alphapose/datasets/coco_det.py index 187d4311..f4c1f027 100644 --- a/alphapose/datasets/coco_det.py +++ b/alphapose/datasets/coco_det.py @@ -68,10 +68,10 @@ def __getitem__(self, index): img_id = int(img_id) else: img_id = det_res['image_id'] - img_path = './data/coco/val2017/%012d.jpg' % img_id + img_path = os.path.join(self._root, 'val2017/%012d.jpg' % img_id) # Load image - image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated imght, imgwidth = image.shape[0], image.shape[1] x1, y1, w, h = det_res['bbox'] diff --git a/alphapose/datasets/coco_wholebody.py b/alphapose/datasets/coco_wholebody.py index b19a4fc0..79c71b4b 100644 --- a/alphapose/datasets/coco_wholebody.py +++ b/alphapose/datasets/coco_wholebody.py @@ -3,7 +3,7 @@ # Written by Haoyi Zhu and Hao-Shu Fang # ----------------------------------------------------- -"""Halpe Full-Body(136 points) Human keypoint dataset.""" +"""Coco WholeBody (133 points) Human keypoint dataset.""" import os import numpy as np @@ -17,7 +17,7 @@ @DATASET.register_module class coco_wholebody(CustomDataset): - """ Halpe Full-Body(136 points) Person dataset. + """ Coco WholeBody (133 points) Person dataset. Parameters ---------- @@ -32,17 +32,18 @@ class coco_wholebody(CustomDataset): CLASSES = ['person'] EVAL_JOINTS = list(range(133)) num_joints = 133 - CustomDataset.lower_body_ids = (11, 12, 13, 14, 15, 16, 17, 21-3, 22-3, 23-3, 24-3, 25-3) + CustomDataset.lower_body_ids = (11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22) """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" - joint_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], #17 body keypoints - [20-3, 23-3], [21-3, 24-3], [22-3, 25-3], [26-3, 42-3], [27-3, 41-3], [28-3, 40-3], [29-3, 39-3], [30-3, 38-3], - [31-3, 37-3], [32-3, 36-3], [33-3, 35-3], [43-3, 52-3], [44-3, 51-3], [45-3, 50-3], [46-3, 49-3], [47-3, 48-3], - [62-3, 71-3], [63-3, 70-3], [64-3, 69-3], [65-3, 68-3], [66-3, 73-3], [67-3, 72-3], [57-3, 61-3], [58-3, 60-3], - [74-3, 80-3], [75-3, 79-3], [76-3, 78-3], [87-3, 89-3], [93-3, 91-3], [86-3, 90-3], [85-3, 81-3], [84-3, 82-3], - [94-3, 115-3], [95-3, 116-3], [96-3, 117-3], [97-3, 118-3], [98-3, 119-3], [99-3, 120-3], [100-3, 121-3], - [101-3, 122-3], [102-3, 123-3], [103-3, 124-3], [104-3, 125-3], [105-3, 126-3], [106-3, 127-3], [107-3, 128-3], - [108-3, 129-3], [109-3, 130-3], [110-3, 131-3], [111-3, 132-3], [112-3, 133-3], [113-3, 134-3], [114-3, 135-3]] + joint_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [17, 20], [18, 21], [19, 22], [23, 39], [24, 38], [25, 37], [26, 36], + [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], + [43, 46], [44, 45], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], + [64, 69], [54, 58], [55, 57], [71, 77], [72, 76], [73, 75], [84, 86], + [90, 88], [83, 87], [82, 78], [81, 79], [91, 112], [92, 113], [93, 114], + [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], + [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], + [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] def _load_jsons(self): @@ -62,7 +63,7 @@ def _load_jsons(self): image_ids = sorted(_coco.getImgIds()) for entry in _coco.loadImgs(image_ids): dirname, filename = entry['coco_url'].split('/')[-2:] - abs_path = os.path.join('/DATA1/Benchmark/coco', dirname, filename) + abs_path = os.path.join(self._root, dirname, filename) if not os.path.exists(abs_path): raise IOError('Image: {} not exists.'.format(abs_path)) label = self._check_load_keypoints(_coco, entry) @@ -84,11 +85,11 @@ def _check_load_keypoints(self, coco, entry): height = entry['height'] for obj in objs: - #obj['keypoints'].extend([0,0,0, 0,0,0, 0,0,0]) - obj['keypoints'].extend(obj['foot_kpts']) - obj['keypoints'].extend(obj['face_kpts']) - obj['keypoints'].extend(obj['lefthand_kpts']) - obj['keypoints'].extend(obj['righthand_kpts']) + if 'foot_kpts' in obj and 'face_kpts' in obj and 'lefthand_kpts' in obj and 'righthand_kpts' in obj: + obj['keypoints'].extend(obj['foot_kpts']) + obj['keypoints'].extend(obj['face_kpts']) + obj['keypoints'].extend(obj['lefthand_kpts']) + obj['keypoints'].extend(obj['righthand_kpts']) contiguous_cid = self.json_id_to_contiguous[obj['category_id']] if contiguous_cid >= self.num_class: # not class of interest @@ -98,8 +99,7 @@ def _check_load_keypoints(self, coco, entry): # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) # require non-zero box area - #if obj['area'] <= 0 or xmax <= xmin or ymax <= ymin: - if (xmax-xmin)*(ymax-ymin) <= 0 or xmax <= xmin or ymax <= ymin: + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue @@ -108,14 +108,11 @@ def _check_load_keypoints(self, coco, entry): for i in range(self.num_joints): joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] - # joints_3d[i, 2, 0] = 0 if obj['keypoints'][i * 3 + 2] >= 0.35: visible = 1 else: visible = 0 - #visible = min(1, visible) joints_3d[i, :2, 1] = visible - # joints_3d[i, 2, 1] = 0 if np.sum(joints_3d[:, 0, 1]) < 1: # no visible keypoint diff --git a/alphapose/datasets/coco_wholebody_det.py b/alphapose/datasets/coco_wholebody_det.py index 16ac2368..4a0f10ab 100644 --- a/alphapose/datasets/coco_wholebody_det.py +++ b/alphapose/datasets/coco_wholebody_det.py @@ -3,7 +3,7 @@ # Written by Haoyi Zhu # ----------------------------------------------------- -"""Haple_136 Human Detection Box dataset.""" +"""Coco WholeBody Human Detection Box dataset.""" import json import os @@ -19,7 +19,7 @@ @DATASET.register_module class coco_wholebody_det(data.Dataset): - """ Halpe_136 human detection box dataset. + """ Coco WholeBody human detection box dataset. """ EVAL_JOINTS = list(range(133)) @@ -68,10 +68,10 @@ def __getitem__(self, index): img_id = int(img_id) else: img_id = det_res['image_id'] - img_path = '/DATA1/Benchmark/coco/val2017/%012d.jpg' % img_id + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) # Load image - image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated imght, imgwidth = image.shape[1], image.shape[2] x1, y1, w, h = det_res['bbox'] @@ -92,7 +92,7 @@ def write_coco_json(self, det_file): dets = [] for entry in tqdm(_coco.loadImgs(image_ids)): abs_path = os.path.join( - '/DATA1/Benchmark/coco', self._img_prefix, entry['file_name']) + self._root, self._img_prefix, entry['file_name']) det = det_model.detect_one_img(abs_path) if det: dets += det @@ -103,11 +103,12 @@ def write_coco_json(self, det_file): def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" - return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], #17 body keypoints - [20-3, 23-3], [21-3, 24-3], [22-3, 25-3], [26-3, 42-3], [27-3, 41-3], [28-3, 40-3], [29-3, 39-3], [30-3, 38-3], - [31-3, 37-3], [32-3, 36-3], [33-3, 35-3], [43-3, 52-3], [44-3, 51-3], [45-3, 50-3], [46-3, 49-3], [47-3, 48-3], - [62-3, 71-3], [63-3, 70-3], [64-3, 69-3], [65-3, 68-3], [66-3, 73-3], [67-3, 72-3], [57-3, 61-3], [58-3, 60-3], - [74-3, 80-3], [75-3, 79-3], [76-3, 78-3], [87-3, 89-3], [93-3, 91-3], [86-3, 90-3], [85-3, 81-3], [84-3, 82-3], - [94-3, 115-3], [95-3, 116-3], [96-3, 117-3], [97-3, 118-3], [98-3, 119-3], [99-3, 120-3], [100-3, 121-3], - [101-3, 122-3], [102-3, 123-3], [103-3, 124-3], [104-3, 125-3], [105-3, 126-3], [106-3, 127-3], [107-3, 128-3], - [108-3, 129-3], [109-3, 130-3], [110-3, 131-3], [111-3, 132-3], [112-3, 133-3], [113-3, 134-3], [114-3, 135-3]] + return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [17, 20], [18, 21], [19, 22], [23, 39], [24, 38], [25, 37], [26, 36], + [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], + [43, 46], [44, 45], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], + [64, 69], [54, 58], [55, 57], [71, 77], [72, 76], [73, 75], [84, 86], + [90, 88], [83, 87], [82, 78], [81, 79], [91, 112], [92, 113], [93, 114], + [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], + [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], + [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] diff --git a/alphapose/datasets/custom.py b/alphapose/datasets/custom.py index a55db74a..018835bd 100644 --- a/alphapose/datasets/custom.py +++ b/alphapose/datasets/custom.py @@ -1,6 +1,6 @@ # ----------------------------------------------------- # Copyright (c) Shanghai Jiao Tong University. All rights reserved. -# Written by Jiefeng Li (jeff.lee.sjtu@gmail.com), Haoyi Zhu +# Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) and Haoyi Zhu # ----------------------------------------------------- """Custum training dataset.""" @@ -22,7 +22,6 @@ class CustomDataset(data.Dataset): """Custom dataset. Annotation file must be in `coco` format. - Parameters ---------- train: bool, default is True @@ -42,16 +41,27 @@ def __init__(self, skip_empty=True, lazy_import=False, **cfg): - if os.path.exists('/home/group3/background.json'): - self.bgim = json.load(open('/home/group3/background.json','r')) - else: - self.bgim = None self._cfg = cfg self._preset_cfg = cfg['PRESET'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] - self._ann_file = os.path.join(self._root, cfg['ANN']) + self._ann_file = cfg['ANN'] + self._num_datasets = 1 + + if isinstance(self._ann_file, list): + self._num_datasets = 2 + self._root_2 = self._root[1] + self._img_prefix_2 = self._img_prefix[1] + self._ann_file_2 = self._ann_file[1] + self._root = self._root[0] + self._img_prefix = self._img_prefix[0] + self._ann_file = self._ann_file[0] + + self._ann_file = os.path.join(self._root, self._ann_file) + self._ann_file_2 = os.path.join(self._root_2, self._ann_file_2) + else: + self._ann_file = os.path.join(self._root, self._ann_file) self._lazy_import = lazy_import self._skip_empty = skip_empty @@ -97,12 +107,10 @@ def __init__(self, self._items, self._labels = self._lazy_load_json() def __getitem__(self, idx): - source = None # get image id if type(self._items[idx]) == dict: img_path = self._items[idx]['path'] img_id = self._items[idx]['id'] - source = self._items[idx]['source'] else: img_path = self._items[idx] img_id = int(os.path.splitext(os.path.basename(img_path))[0]) @@ -111,15 +119,8 @@ def __getitem__(self, idx): label = copy.deepcopy(self._labels[idx]) img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) - if self.bgim and (source == 'frei' or source == 'partX' or source == 'OneHand' or source == 'interhand'): # hand - img, label = self.hand_augmentation(img, label) - - if source == 'hand_labels_synth' or source == 'hand143_panopticdb': # hand - if not self.skip_augmentation(0.8): - img = self.motion_blur(img) - # transform ground truth into training label and apply data augmentation - img, label, label_mask, bbox = self.transformation(img, label, source) + img, label, label_mask, bbox = self.transformation(img, label) return img, label, label_mask, img_id, bbox def __len__(self): @@ -138,141 +139,19 @@ def _lazy_load_ann_file(self): return _database def _lazy_load_json(self): - if os.path.exists(self._ann_file + '_annot_keypoint.pkl') and self._lazy_import: + postfix = '_annot_keypoint.pkl' if self._num_datasets == 1 else '_plus_annot_keypoint.pkl' + if os.path.exists(self._ann_file + postfix) and self._lazy_import: print('Lazy load annot...') - with open(self._ann_file + '_annot_keypoint.pkl', 'rb') as fid: + with open(self._ann_file + postfix, 'rb') as fid: items, labels = pk.load(fid) else: items, labels = self._load_jsons() - if os.access(self._ann_file + '_annot_keypoint.pkl', os.W_OK): - with open(self._ann_file + '_annot_keypoint.pkl', 'wb') as fid: + if os.access(self._ann_file + postfix, os.W_OK): + with open(self._ann_file + postfix, 'wb') as fid: pk.dump((items, labels), fid, pk.HIGHEST_PROTOCOL) return items, labels - def motion_blur(self, image, degree=12, angle=45): - image = np.array(image) - - M = cv2.getRotationMatrix2D((degree / 2, degree / 2), angle, 1) - motion_blur_kernel = np.diag(np.ones(degree)) - motion_blur_kernel = cv2.warpAffine(motion_blur_kernel, M, (degree, degree)) - - motion_blur_kernel = motion_blur_kernel / degree - blurred = cv2.filter2D(image, -1, motion_blur_kernel) - - # convert to uint8 - cv2.normalize(blurred, blurred, 0, 255, cv2.NORM_MINMAX) - blurred = np.array(blurred, dtype=np.uint8) - return blurred - - def skip_augmentation(self, p): - x = np.random.rand() - if x < p: - return True - else: - return False - - def get_bgimg(self, box_h, box_w): - bgimgpath = random.choice(self.bgim) - file_name = bgimgpath['file_name'] - img_name = file_name.split('/')[-1] - img_path = '/home/group3/coco/train2017/' + img_name - img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) - img_h, img_w = img.shape[0], img.shape[1] - - if img_h <= box_h or img_w <= box_w: - img = cv2.resize(img, (int(max(img_w, (np.random.rand()*1.8+1.2) * box_w)), int(max(img_h, (np.random.rand()*1.8+1.2) * box_h)))) - img_h, img_w = img.shape[0], img.shape[1] - - # crop the img - if (img_h <= img_w) and (img_h - 4 > box_w) and (np.random.rand() > 0.25): - crop_w = int((img_h - 2 - max((box_w + 2), (img_h / 3))) * np.random.rand() + max((box_w + 2), (img_h / 3))) - start_p = (img_w - crop_w) * np.random.rand() - img = img[:, int(start_p):int(start_p + crop_w + 1), :] - assert img.shape[1] > box_w and img.shape[0] > img.shape[1], (img.shape, (box_w, box_h)) - - assert img.shape[0] > box_h and img.shape[1] > box_w, (img.shape, (box_h, box_w)) - - return img - - def hand_augmentation(self, img, label): - # some images are too big (mainly in OneHand) - if img.shape[0] > 640 or img.shape[1] > 640: - h, w, c = img.shape - resize_scale = 640 / h - img = cv2.resize(img, (int(w * resize_scale), int(h * resize_scale))) - handkp = label['joints_3d'][:,0:2,0][115:136,:] - assert handkp.shape == (21, 2) - handkp = handkp * resize_scale - label['joints_3d'][:,0:2,0][115:136,:] = handkp - label['bbox'] = list(np.array(label['bbox']) * resize_scale) - label['height'], label['width'] = img.shape[0:2] - - if not self.skip_augmentation(0.8): - img = self.motion_blur(img) - - label['bbox'] = list(label['bbox']) - # print(img_path, 'hand augmentation') - if not self.skip_augmentation(0.3): - handkp = label['joints_3d'][:,0:2,0][115:136,:] - assert handkp.shape == (21, 2) - - # resize the hand img (random scale between 40% and 100%) - resize_scale = 0.6 * np.random.rand() + 0.4 - handkp = handkp * resize_scale - img = cv2.resize(img, dsize=None, fx=resize_scale, fy=resize_scale) - label['height'], label['width'] = img.shape[0:2] - label['bbox'] = list(np.array(label['bbox']) * resize_scale) - - h, w = img.shape[0:2] - hand_xmin, hand_xmax, hand_ymin, hand_ymax = int(round(min(handkp[:,0]))), int(round(max(handkp[:,0]))),int(round(min(handkp[:,1]))), int(round(max(handkp[:,1]))) - boxw_time, boxh_time = float(np.random.rand()*2+4), float(np.random.rand()*4+5) - box_w, box_h = max(int((hand_xmax - hand_xmin)*boxw_time), w+1), max(int((hand_ymax - hand_ymin)*boxh_time),h+1) - - background= self.get_bgimg(box_h, box_w) - - bh, bw, bc = background.shape - # print(bw - box_w, bh - box_h) - x, y = int(np.random.randint(0,int(bw - box_w),size=1)), int(np.random.randint(0,int(bh - box_h), size=1)) - hd = copy.deepcopy(img) - new_image = copy.deepcopy(background) - ralative_x, ralative_y = int(np.random.randint(0,int(box_w-w),size=1)), int(np.random.randint(0,int(box_h-h), size=1)) - new_loc_x, new_loc_y = x + ralative_x, y + ralative_y - assert (new_loc_x+w < x+box_w) and (new_loc_y+h < y+box_h) - - handkp[(handkp[:, 0] + handkp[:, 1]) > 0] += [new_loc_x, new_loc_y] - - if new_loc_x < 0: - hd = hd[:,-new_loc_x:,:] - new_loc_x = 0 - if new_loc_y < 0: - hd = hd[-new_loc_y:,:,:] - new_loc_y = 0 - if new_loc_x+hd.shape[1]>new_image.shape[1]: - hd = hd[:, :new_image.shape[1]-new_loc_x, :] - if new_loc_y+hd.shape[0]>new_image.shape[0]: - hd = hd[:new_image.shape[0]-new_loc_y, :, :] - - new_image[new_loc_y:new_loc_y+h,new_loc_x:new_loc_x+w,:] = hd - label['bbox'][0] = label['bbox'][0] + new_loc_x - label['bbox'][1] = label['bbox'][1] + new_loc_y - label['bbox'][2] = label['bbox'][2] + new_loc_x - label['bbox'][3] = label['bbox'][3] + new_loc_y - - max_length = max(new_image.shape[0], new_image.shape[1]) - if max_length > 1200: - scale = 640 / max_length - new_image = cv2.resize(new_image, (int(round(new_image.shape[1] * scale)), int(round(new_image.shape[0] * scale)))) - handkp = handkp * scale - label['joints_3d'][:,0:2,0][115:136,:] = handkp - img = new_image - label['height'], label['width'] = img.shape[0:2] - - label['bbox'] = tuple(label['bbox']) - assert label['height'] == img.shape[0] and label['width'] == img.shape[1], (img.shape, (label['height'], label['width']), flag) - - return img, label - @abstractmethod def _load_jsons(self): pass @@ -289,4 +168,4 @@ def num_joints(self): def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" - return None + return None \ No newline at end of file diff --git a/alphapose/datasets/halpe_136.py b/alphapose/datasets/halpe_136.py index a29bfbb3..534e1d9b 100644 --- a/alphapose/datasets/halpe_136.py +++ b/alphapose/datasets/halpe_136.py @@ -17,7 +17,7 @@ @DATASET.register_module class Halpe_136(CustomDataset): - """ Halpe Full-Body(136 points) Person dataset. + """ Halpe Full-Body (136 points) Person dataset. Parameters ---------- @@ -61,46 +61,18 @@ def _load_jsons(self): # iterate through the annotations image_ids = sorted(_coco.getImgIds()) for entry in _coco.loadImgs(image_ids): + abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) - if 'source' not in entry: # coco - dirname, filename = entry['coco_url'].split('/')[-2:] - abs_path = os.path.join('/DATA1/Benchmark/coco', dirname, filename) - if not os.path.exists(abs_path): - raise IOError('Image: {} not exists.'.format(abs_path)) - label = self._check_load_keypoints(_coco, entry) - if not label: - continue - for obj in label: - items.append(abs_path) - labels.append(obj) - else: - source = entry['source'] - if source == 'hico': - abs_path = os.path.join('/DATA1/Benchmark/hico_20160224_det/images/train2015', entry['file_name']) - elif source == '300wLP': - abs_path = os.path.join('/DATA1/Benchmark/300W_LP', entry['file_name']) - elif source == 'frei': - abs_path = os.path.join('/DATA1/Benchmark/FreiHand/training/rgb', entry['file_name']) - - if not os.path.exists(abs_path): - raise IOError('Image: {} not exists.'.format(abs_path)) - label = self._check_load_keypoints(_coco, entry) - if not label: - continue + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints(_coco, entry) + if not label: + continue - # num of items are relative to person, not image - if source == 'hico': - for i in range(6): - for obj in label: - items.append({'path': abs_path, 'id': entry['id'], 'source':source}) - labels.append(obj) - elif source == 'frei': - for obj in label: - items.append({'path': abs_path, 'id': entry['id'], 'source':source}) - labels.append(obj) - for obj in label: - items.append({'path': abs_path, 'id': entry['id'], 'source':source}) - labels.append(obj) + # num of items are relative to person, not image + for obj in label: + items.append({'path': abs_path, 'id': entry['id']}) + labels.append(obj) return items, labels @@ -123,8 +95,7 @@ def _check_load_keypoints(self, coco, entry): # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) # require non-zero box area - #if obj['area'] <= 0 or xmax <= xmin or ymax <= ymin: - if (xmax-xmin)*(ymax-ymin) <= 0 or xmax <= xmin or ymax <= ymin: + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue @@ -133,14 +104,11 @@ def _check_load_keypoints(self, coco, entry): for i in range(self.num_joints): joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] - # joints_3d[i, 2, 0] = 0 if obj['keypoints'][i * 3 + 2] >= 0.35: visible = 1 else: visible = 0 - #visible = min(1, visible) joints_3d[i, :2, 1] = visible - # joints_3d[i, 2, 1] = 0 if np.sum(joints_3d[:, 0, 1]) < 1: # no visible keypoint diff --git a/alphapose/datasets/halpe_136_det.py b/alphapose/datasets/halpe_136_det.py index ab3d57f8..ee2f8244 100644 --- a/alphapose/datasets/halpe_136_det.py +++ b/alphapose/datasets/halpe_136_det.py @@ -19,7 +19,7 @@ @DATASET.register_module class Halpe_136_det(data.Dataset): - """ Halpe_136 human detection box dataset. + """ Halpe Full-Body (136 keypoints) human detection box dataset. """ EVAL_JOINTS = list(range(136)) @@ -68,10 +68,10 @@ def __getitem__(self, index): img_id = int(img_id) else: img_id = det_res['image_id'] - img_path = '/DATA1/Benchmark/coco/val2017/%012d.jpg' % img_id + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) # Load image - image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated imght, imgwidth = image.shape[1], image.shape[2] x1, y1, w, h = det_res['bbox'] @@ -92,7 +92,7 @@ def write_coco_json(self, det_file): dets = [] for entry in tqdm(_coco.loadImgs(image_ids)): abs_path = os.path.join( - '/DATA1/Benchmark/coco', self._img_prefix, entry['file_name']) + self._root, self._img_prefix, entry['file_name']) det = det_model.detect_one_img(abs_path) if det: dets += det diff --git a/alphapose/datasets/halpe_26.py b/alphapose/datasets/halpe_26.py index 2ddb0676..74e770b1 100644 --- a/alphapose/datasets/halpe_26.py +++ b/alphapose/datasets/halpe_26.py @@ -17,7 +17,7 @@ @DATASET.register_module class Halpe_26(CustomDataset): - """ Halpe_simple 26 keypoints Person Pose dataset. + """ Halpe 26 keypoints Person Pose dataset. Parameters ---------- @@ -52,43 +52,18 @@ def _load_jsons(self): # iterate through the annotations image_ids = sorted(_coco.getImgIds()) for entry in _coco.loadImgs(image_ids): + abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) - if 'source' not in entry: # coco - dirname, filename = entry['coco_url'].split('/')[-2:] - abs_path = os.path.join('/DATA1/Benchmark/coco', dirname, filename) - if not os.path.exists(abs_path): - raise IOError('Image: {} not exists.'.format(abs_path)) - label = self._check_load_keypoints(_coco, entry) - if not label: - continue - for obj in label: - items.append(abs_path) - labels.append(obj) - else: - source = entry['source'] - if source == 'hico': - abs_path = os.path.join('/DATA1/Benchmark/hico_20160224_det/images/train2015', entry['file_name']) - elif source == '300wLP': - abs_path = os.path.join('/DATA1/Benchmark/300W_LP', entry['file_name']) - elif source == 'frei': - abs_path = os.path.join('/DATA1/Benchmark/FreiHand/training/rgb', entry['file_name']) - - if not os.path.exists(abs_path): - raise IOError('Image: {} not exists.'.format(abs_path)) - label = self._check_load_keypoints(_coco, entry) - if not label: - continue + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints(_coco, entry) + if not label: + continue - # num of items are relative to person, not image - if source == 'hico': - for i in range(6): - for obj in label: - items.append({'path': abs_path, 'id': entry['id'], 'source':source}) - labels.append(obj) - - for obj in label: - items.append({'path': abs_path, 'id': entry['id'], 'source':source}) - labels.append(obj) + # num of items are relative to person, not image + for obj in label: + items.append({'path': abs_path, 'id': entry['id']}) + labels.append(obj) return items, labels @@ -111,8 +86,7 @@ def _check_load_keypoints(self, coco, entry): # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) # require non-zero box area - #if obj['area'] <= 0 or xmax <= xmin or ymax <= ymin: - if (xmax-xmin)*(ymax-ymin) <= 0 or xmax <= xmin or ymax <= ymin: + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue @@ -121,14 +95,11 @@ def _check_load_keypoints(self, coco, entry): for i in range(self.num_joints): joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] - # joints_3d[i, 2, 0] = 0 if obj['keypoints'][i * 3 + 2] >= 0.35: visible = 1 else: visible = 0 - #visible = min(1, visible) joints_3d[i, :2, 1] = visible - # joints_3d[i, 2, 1] = 0 if np.sum(joints_3d[:, 0, 1]) < 1: # no visible keypoint diff --git a/alphapose/datasets/halpe_26_det.py b/alphapose/datasets/halpe_26_det.py index e5e109b3..efe7f29a 100644 --- a/alphapose/datasets/halpe_26_det.py +++ b/alphapose/datasets/halpe_26_det.py @@ -19,7 +19,7 @@ @DATASET.register_module class Halpe_26_det(data.Dataset): - """ Halpe_26 human detection box dataset. + """ Halpe 26 human detection box dataset. """ EVAL_JOINTS = list(range(26)) @@ -68,10 +68,10 @@ def __getitem__(self, index): img_id = int(img_id) else: img_id = det_res['image_id'] - img_path = '/DATA1/Benchmark/coco/val2017/%012d.jpg' % img_id + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) # Load image - image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated imght, imgwidth = image.shape[1], image.shape[2] x1, y1, w, h = det_res['bbox'] @@ -92,7 +92,7 @@ def write_coco_json(self, det_file): dets = [] for entry in tqdm(_coco.loadImgs(image_ids)): abs_path = os.path.join( - '/DATA1/Benchmark/coco', self._img_prefix, entry['file_name']) + self._root, self._img_prefix, entry['file_name']) det = det_model.detect_one_img(abs_path) if det: dets += det diff --git a/alphapose/datasets/halpe_68_noface.py b/alphapose/datasets/halpe_68_noface.py new file mode 100644 index 00000000..a1dde385 --- /dev/null +++ b/alphapose/datasets/halpe_68_noface.py @@ -0,0 +1,155 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by Haoyi Zhu +# ----------------------------------------------------- + +"""Halpe without face (68 keypoints) Human keypoint dataset.""" +import os + +import numpy as np +from tkinter import _flatten + +from alphapose.models.builder import DATASET +from alphapose.utils.bbox import bbox_clip_xyxy, bbox_xywh_to_xyxy + +from .custom import CustomDataset + + +@DATASET.register_module +class Halpe_68_noface(CustomDataset): + """ Halpe Full-Body without face (68 keypoints) Person dataset. + + Parameters + ---------- + train: bool, default is True + If true, will set as training mode. + skip_empty: bool, default is False + Whether skip entire image if no valid label is found. Use `False` if this dataset is + for validation to avoid COCO metric error. + dpg: bool, default is False + If true, will activate `dpg` for data augmentation. + """ + CLASSES = ['person'] + EVAL_JOINTS = list(range(68)) + num_joints = 68 + CustomDataset.lower_body_ids = (11, 12, 13, 14, 15, 16, 20, 21, 22, 23, 24, 25) + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + joint_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [20, 21], [22, 23], [24, 25], [26, 47], [27, 48], [28, 49], [29, 50], [30, 51], + [31, 52], [32, 53], [33, 54], [34, 55], [35, 56], [36, 57], [37, 58], [38, 59], + [39, 60], [40, 61], [41, 62], [42, 63], [43, 64], [44, 65], [45, 66], [46, 67]] + + + def _load_jsons(self): + """Load all image paths and labels from JSON annotation files into buffer.""" + items = [] + labels = [] + + _coco = self._lazy_load_ann_file() + + classes = [c['name'] for c in _coco.loadCats(_coco.getCatIds())] + assert classes == self.CLASSES, "Incompatible category names with COCO. " + + self.json_id_to_contiguous = { + v: k for k, v in enumerate(_coco.getCatIds())} + + # iterate through the annotations + image_ids = sorted(_coco.getImgIds()) + for entry in _coco.loadImgs(image_ids): + abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) + + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints(_coco, entry) + if not label: + continue + + # num of items are relative to person, not image + for obj in label: + items.append({'path': abs_path, 'id': entry['id']}) + labels.append(obj) + + return items, labels + + def _check_load_keypoints(self, coco, entry): + """Check and load ground-truth keypoints""" + ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) + objs = coco.loadAnns(ann_ids) + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + + for obj in objs: + obj['keypoints'] = obj['keypoints'][:26*3] + obj['keypoints'][-42*3:] + + contiguous_cid = self.json_id_to_contiguous[obj['category_id']] + if contiguous_cid >= self.num_class: + # not class of interest + continue + if max(obj['keypoints']) == 0: + continue + # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound + if 'bbox' not in obj: + obj['bbox'] = [1, 1, width-1, height-1] + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + + # require non-zero box area + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + + # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility + joints_3d = np.zeros((self.num_joints, 3, 2), dtype=np.float32) + for i in range(self.num_joints): + joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] + joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] + if obj['keypoints'][i * 3 + 2] >= 0.25: + visible = 1 + else: + visible = 0 + joints_3d[i, :2, 1] = visible + + if np.sum(joints_3d[:, 0, 1]) < 1: + # no visible keypoint + continue + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d + }) + + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append({ + 'bbox': np.array([-1, -1, 0, 0]), + 'width': width, + 'height': height, + 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) + }) + return valid_objs + + def _get_box_center_area(self, bbox): + """Get bbox center""" + c = np.array([(bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0]) + area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) + return c, area + + def _get_keypoints_center_count(self, keypoints): + """Get geometric center of all keypoints""" + keypoint_x = np.sum(keypoints[:, 0, 0] * (keypoints[:, 0, 1] > 0)) + keypoint_y = np.sum(keypoints[:, 1, 0] * (keypoints[:, 1, 1] > 0)) + num = float(np.sum(keypoints[:, 0, 1])) + return np.array([keypoint_x / num, keypoint_y / num]), num diff --git a/alphapose/datasets/halpe_68_noface_det.py b/alphapose/datasets/halpe_68_noface_det.py new file mode 100644 index 00000000..df460593 --- /dev/null +++ b/alphapose/datasets/halpe_68_noface_det.py @@ -0,0 +1,109 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by HaoyiZhu +# ----------------------------------------------------- + +"""Halpe without face (68 keypoints) Human Detection Box dataset.""" +import json +import os + +import cv2 +import torch +import torch.utils.data as data +from tqdm import tqdm + +from alphapose.utils.presets import SimpleTransform +from detector.apis import get_detector +from alphapose.models.builder import DATASET + + +@DATASET.register_module +class Halpe_68_noface_det(data.Dataset): + """ Halpe Full-Body without face (68 keypoints) human detection box dataset. + + """ + EVAL_JOINTS = list(range(68)) + + def __init__(self, + det_file=None, + opt=None, + **cfg): + + self._cfg = cfg + self._opt = opt + self._preset_cfg = cfg['PRESET'] + self._root = cfg['ROOT'] + self._img_prefix = cfg['IMG_PREFIX'] + if not det_file: + det_file = cfg['DET_FILE'] + self._ann_file = os.path.join(self._root, cfg['ANN']) + + if os.path.exists(det_file): + print("Detection results exist, will use it") + else: + print("Will create detection results to {}".format(det_file)) + self.write_coco_json(det_file) + + assert os.path.exists(det_file), "Error: no detection results found" + with open(det_file, 'r') as fid: + self._det_json = json.load(fid) + + self._input_size = self._preset_cfg['IMAGE_SIZE'] + self._output_size = self._preset_cfg['HEATMAP_SIZE'] + + self._sigma = self._preset_cfg['SIGMA'] + + if self._preset_cfg['TYPE'] == 'simple': + self.transformation = SimpleTransform( + self, scale_factor=0, + input_size=self._input_size, + output_size=self._output_size, + rot=0, sigma=self._sigma, + train=False, add_dpg=False) + + def __getitem__(self, index): + det_res = self._det_json[index] + if not isinstance(det_res['image_id'], int): + img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) + img_id = int(img_id) + else: + img_id = det_res['image_id'] + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) + + # Load image + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated + + imght, imgwidth = image.shape[1], image.shape[2] + x1, y1, w, h = det_res['bbox'] + bbox = [x1, y1, x1 + w, y1 + h] + inp, bbox = self.transformation.test_transform(image, bbox) + return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) + + def __len__(self): + return len(self._det_json) + + def write_coco_json(self, det_file): + from pycocotools.coco import COCO + import pathlib + + _coco = COCO(self._ann_file) + image_ids = sorted(_coco.getImgIds()) + det_model = get_detector(self._opt) + dets = [] + for entry in tqdm(_coco.loadImgs(image_ids)): + abs_path = os.path.join( + self._root, self._img_prefix, entry['file_name']) + det = det_model.detect_one_img(abs_path) + if det: + dets += det + pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) + json.dump(dets, open(det_file, 'w')) + + @property + def joint_pairs(self): + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [20, 21], [22, 23], [24, 25], [26, 47], [27, 48], [28, 49], [29, 50], [30, 51], + [31, 52], [32, 53], [33, 54], [34, 55], [35, 56], [36, 57], [37, 58], [38, 59], + [39, 60], [40, 61], [41, 62], [42, 63], [43, 64], [44, 65], [45, 66], [46, 67]] \ No newline at end of file diff --git a/alphapose/datasets/halpe_coco_wholebody_136.py b/alphapose/datasets/halpe_coco_wholebody_136.py new file mode 100644 index 00000000..d1c61d5e --- /dev/null +++ b/alphapose/datasets/halpe_coco_wholebody_136.py @@ -0,0 +1,260 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by Haoyi Zhu and Hao-Shu Fang +# ----------------------------------------------------- + +"""Halpe Full-Body plus coco wholebody (136 points) Human keypoint dataset.""" +import os + +import numpy as np +from tkinter import _flatten +from pycocotools.coco import COCO + +from alphapose.models.builder import DATASET +from alphapose.utils.bbox import bbox_clip_xyxy, bbox_xywh_to_xyxy + +from .custom import CustomDataset + + +@DATASET.register_module +class Halpe_coco_wholebody_136(CustomDataset): + """ Halpe Full-Body plus coco wholebody (136 points) Person dataset. + + Parameters + ---------- + train: bool, default is True + If true, will set as training mode. + skip_empty: bool, default is False + Whether skip entire image if no valid label is found. Use `False` if this dataset is + for validation to avoid COCO metric error. + dpg: bool, default is False + If true, will activate `dpg` for data augmentation. + """ + CLASSES = ['person'] + EVAL_JOINTS = list(range(136)) + num_joints = 136 + CustomDataset.lower_body_ids = (11, 12, 13, 14, 15, 16, 20, 21, 22, 23, 24, 25) + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + joint_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], #17 body keypoints + [20, 21], [22, 23], [24, 25], [26, 42], [27, 41], [28, 40], [29, 39], [30, 38], + [31, 37], [32, 36], [33, 35], [43, 52], [44, 51], [45, 50],[46, 49], [47, 48], + [62, 71], [63, 70], [64, 69], [65, 68], [66, 73], [67, 72], [57, 61], [58, 60], + [74, 80], [75, 79], [76, 78], [87, 89], [93, 91], [86, 90], [85, 81], [84, 82], + [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], + [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], + [108, 129], [109, 130], [110, 131], [111, 132], [112, 133], [113, 134], [114, 135]] + + def _lazy_load_ann_file_2(self): + if os.path.exists(self._ann_file_2 + '.pkl') and self._lazy_import: + print('Lazy load json...') + with open(self._ann_file_2 + '.pkl', 'rb') as fid: + return pk.load(fid) + else: + _database = COCO(self._ann_file_2) + if os.access(self._ann_file_2 + '.pkl', os.W_OK): + with open(self._ann_file_2 + '.pkl', 'wb') as fid: + pk.dump(_database, fid, pk.HIGHEST_PROTOCOL) + return _database + + def _load_jsons(self): + """Load all image paths and labels from JSON annotation files into buffer.""" + items = [] + labels = [] + + # Halpe Fullbody + _coco = self._lazy_load_ann_file() + + classes = [c['name'] for c in _coco.loadCats(_coco.getCatIds())] + assert classes == self.CLASSES, "Incompatible category names with COCO. " + + self.json_id_to_contiguous = { + v: k for k, v in enumerate(_coco.getCatIds())} + + # iterate through the annotations + image_ids = sorted(_coco.getImgIds()) + for entry in _coco.loadImgs(image_ids): + abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) + + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints(_coco, entry) + if not label: + continue + + for i in range(6): + # num of items are relative to person, not image + for obj in label: + items.append({'path': abs_path, 'id': entry['id'] + 600000}) # to avoid id conflict with coco wholebody + labels.append(obj) + + # coco wholebody + _coco = self._lazy_load_ann_file_2() + + classes = [c['name'] for c in _coco.loadCats(_coco.getCatIds())] + assert classes == self.CLASSES, "Incompatible category names with COCO. " + + self.json_id_to_contiguous = { + v: k for k, v in enumerate(_coco.getCatIds())} + + # iterate through the annotations + image_ids = sorted(_coco.getImgIds()) + for entry in _coco.loadImgs(image_ids): + dirname, filename = entry['coco_url'].split('/')[-2:] + abs_path = os.path.join(self._root_2, dirname, filename) + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints_2(_coco, entry) + if not label: + continue + for obj in label: + items.append(abs_path) + labels.append(obj) + + + return items, labels + + def _check_load_keypoints(self, coco, entry): + """Check and load ground-truth keypoints for Halpe FullBody""" + ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) + objs = coco.loadAnns(ann_ids) + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + + for obj in objs: + contiguous_cid = self.json_id_to_contiguous[obj['category_id']] + if contiguous_cid >= self.num_class: + # not class of interest + continue + if max(obj['keypoints']) == 0: + continue + # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + # require non-zero box area + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility + joints_3d = np.zeros((self.num_joints, 3, 2), dtype=np.float32) + for i in range(self.num_joints): + joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] + joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] + if obj['keypoints'][i * 3 + 2] >= 0.35: + visible = 1 + else: + visible = 0 + joints_3d[i, :2, 1] = visible + + if np.sum(joints_3d[:, 0, 1]) < 1: + # no visible keypoint + continue + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d + }) + + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append({ + 'bbox': np.array([-1, -1, 0, 0]), + 'width': width, + 'height': height, + 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) + }) + return valid_objs + + def _check_load_keypoints_2(self, coco, entry): + """Check and load ground-truth keypoints for coco wholebody""" + ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) + objs = coco.loadAnns(ann_ids) + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + + for obj in objs: + if 'foot_kpts' in obj and 'face_kpts' in obj and 'lefthand_kpts' in obj and 'righthand_kpts' in obj: + obj['keypoints'].extend([0] * 9) # coco wholebody has only 133 kpts + obj['keypoints'].extend(obj['foot_kpts']) + obj['keypoints'].extend(obj['face_kpts']) + obj['keypoints'].extend(obj['lefthand_kpts']) + obj['keypoints'].extend(obj['righthand_kpts']) + contiguous_cid = self.json_id_to_contiguous[obj['category_id']] + if contiguous_cid >= self.num_class: + # not class of interest + continue + if max(obj['keypoints']) == 0: + continue + # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + # require non-zero box area + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility + joints_3d = np.zeros((self.num_joints, 3, 2), dtype=np.float32) + for i in range(self.num_joints): + joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] + joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] + if obj['keypoints'][i * 3 + 2] >= 0.35: + visible = 1 + else: + visible = 0 + joints_3d[i, :2, 1] = visible + + if np.sum(joints_3d[:, 0, 1]) < 1: + # no visible keypoint + continue + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d + }) + + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append({ + 'bbox': np.array([-1, -1, 0, 0]), + 'width': width, + 'height': height, + 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) + }) + return valid_objs + + def _get_box_center_area(self, bbox): + """Get bbox center""" + c = np.array([(bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0]) + area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) + return c, area + + def _get_keypoints_center_count(self, keypoints): + """Get geometric center of all keypoints""" + keypoint_x = np.sum(keypoints[:, 0, 0] * (keypoints[:, 0, 1] > 0)) + keypoint_y = np.sum(keypoints[:, 1, 0] * (keypoints[:, 1, 1] > 0)) + num = float(np.sum(keypoints[:, 0, 1])) + return np.array([keypoint_x / num, keypoint_y / num]), num diff --git a/alphapose/datasets/halpe_coco_wholebody_136_det.py b/alphapose/datasets/halpe_coco_wholebody_136_det.py new file mode 100644 index 00000000..c1acef28 --- /dev/null +++ b/alphapose/datasets/halpe_coco_wholebody_136_det.py @@ -0,0 +1,113 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by Haoyi Zhu +# ----------------------------------------------------- + +"""Halpe Full-Body plus coco wholebody (136 points) Human Detection Box dataset.""" +import json +import os + +import cv2 +import torch +import torch.utils.data as data +from tqdm import tqdm + +from alphapose.utils.presets import SimpleTransform +from detector.apis import get_detector +from alphapose.models.builder import DATASET + + +@DATASET.register_module +class Halpe_coco_wholebody_136_det(data.Dataset): + """ Halpe Full-Body plus coco wholebody (136 points) human detection box dataset. + + """ + EVAL_JOINTS = list(range(136)) + + def __init__(self, + det_file=None, + opt=None, + **cfg): + + self._cfg = cfg + self._opt = opt + self._preset_cfg = cfg['PRESET'] + self._root = cfg['ROOT'] + self._img_prefix = cfg['IMG_PREFIX'] + if not det_file: + det_file = cfg['DET_FILE'] + self._ann_file = os.path.join(self._root, cfg['ANN']) + + if os.path.exists(det_file): + print("Detection results exist, will use it") + else: + print("Will create detection results to {}".format(det_file)) + self.write_coco_json(det_file) + + assert os.path.exists(det_file), "Error: no detection results found" + with open(det_file, 'r') as fid: + self._det_json = json.load(fid) + + self._input_size = self._preset_cfg['IMAGE_SIZE'] + self._output_size = self._preset_cfg['HEATMAP_SIZE'] + + self._sigma = self._preset_cfg['SIGMA'] + + if self._preset_cfg['TYPE'] == 'simple': + self.transformation = SimpleTransform( + self, scale_factor=0, + input_size=self._input_size, + output_size=self._output_size, + rot=0, sigma=self._sigma, + train=False, add_dpg=False) + + def __getitem__(self, index): + det_res = self._det_json[index] + if not isinstance(det_res['image_id'], int): + img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) + img_id = int(img_id) + else: + img_id = det_res['image_id'] + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) + + # Load image + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated + + imght, imgwidth = image.shape[1], image.shape[2] + x1, y1, w, h = det_res['bbox'] + bbox = [x1, y1, x1 + w, y1 + h] + inp, bbox = self.transformation.test_transform(image, bbox) + return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) + + def __len__(self): + return len(self._det_json) + + def write_coco_json(self, det_file): + from pycocotools.coco import COCO + import pathlib + + _coco = COCO(self._ann_file) + image_ids = sorted(_coco.getImgIds()) + det_model = get_detector(self._opt) + dets = [] + for entry in tqdm(_coco.loadImgs(image_ids)): + abs_path = os.path.join( + self._root, self._img_prefix, entry['file_name']) + det = det_model.detect_one_img(abs_path) + if det: + dets += det + pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) + json.dump(dets, open(det_file, 'w')) + + @property + def joint_pairs(self): + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + return[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [20, 21], [22, 23], [24, 25], [26, 42], [27, 41], [28, 40], [29, 39], [30, 38], + [31, 37], [32, 36], [33, 35], [43, 52], [44, 51], [45, 50],[46, 49], [47, 48], + [62, 71], [63, 70], [64, 69], [65, 68], [66, 73], [67, 72], [57, 61], [58, 60], + [74, 80], [75, 79], [76, 78], [87, 89], [93, 91], [86, 90], [85, 81], [84, 82], + [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], + [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], + [108, 129], [109, 130], [110, 131], [111, 132], [112, 133], [113, 134], [114, 135]] diff --git a/alphapose/datasets/halpe_coco_wholebody_26.py b/alphapose/datasets/halpe_coco_wholebody_26.py new file mode 100644 index 00000000..f2606da8 --- /dev/null +++ b/alphapose/datasets/halpe_coco_wholebody_26.py @@ -0,0 +1,255 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by Haoyi Zhu and Hao-Shu Fang +# ----------------------------------------------------- + +"""Halpe plus coco wholebody (26 keypoints) Human keypoint dataset.""" +import os + +import numpy as np +from tkinter import _flatten +from pycocotools.coco import COCO + +from alphapose.models.builder import DATASET +from alphapose.utils.bbox import bbox_clip_xyxy, bbox_xywh_to_xyxy + +from .custom import CustomDataset + + +@DATASET.register_module +class Halpe_coco_wholebody_26(CustomDataset): + """ Halpe Full-Body plus coco wholebody (26 keypoints) Person Pose dataset. + + Parameters + ---------- + train: bool, default is True + If true, will set as training mode. + skip_empty: bool, default is False + Whether skip entire image if no valid label is found. Use `False` if this dataset is + for validation to avoid COCO metric error. + dpg: bool, default is False + If true, will activate `dpg` for data augmentation. + """ + CLASSES = ['person'] + EVAL_JOINTS = list(range(26)) + num_joints = 26 + CustomDataset.lower_body_ids = (11, 12, 13, 14, 15, 16, 20, 21, 22, 23, 24, 25) + joint_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [20, 21], [22, 23], [24, 25]] + + def _lazy_load_ann_file_2(self): + if os.path.exists(self._ann_file_2 + '.pkl') and self._lazy_import: + print('Lazy load json...') + with open(self._ann_file_2 + '.pkl', 'rb') as fid: + return pk.load(fid) + else: + _database = COCO(self._ann_file_2) + if os.access(self._ann_file_2 + '.pkl', os.W_OK): + with open(self._ann_file_2 + '.pkl', 'wb') as fid: + pk.dump(_database, fid, pk.HIGHEST_PROTOCOL) + return _database + + def _load_jsons(self): + """Load all image paths and labels from JSON annotation files into buffer.""" + items = [] + labels = [] + + # Halpe Fullbody + _coco = self._lazy_load_ann_file() + + classes = [c['name'] for c in _coco.loadCats(_coco.getCatIds())] + assert classes == self.CLASSES, "Incompatible category names with COCO. " + + self.json_id_to_contiguous = { + v: k for k, v in enumerate(_coco.getCatIds())} + + # iterate through the annotations + image_ids = sorted(_coco.getImgIds()) + for entry in _coco.loadImgs(image_ids): + abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) + + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints(_coco, entry) + if not label: + continue + + for i in range(6): + # num of items are relative to person, not image + for obj in label: + items.append({'path': abs_path, 'id': entry['id'] + 600000}) # to avoid id conflict with coco wholebody + labels.append(obj) + + # coco wholebody + _coco = self._lazy_load_ann_file_2() + + classes = [c['name'] for c in _coco.loadCats(_coco.getCatIds())] + assert classes == self.CLASSES, "Incompatible category names with COCO. " + + self.json_id_to_contiguous = { + v: k for k, v in enumerate(_coco.getCatIds())} + + # iterate through the annotations + image_ids = sorted(_coco.getImgIds()) + for entry in _coco.loadImgs(image_ids): + dirname, filename = entry['coco_url'].split('/')[-2:] + abs_path = os.path.join(self._root_2, dirname, filename) + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints_2(_coco, entry) + if not label: + continue + for obj in label: + items.append(abs_path) + labels.append(obj) + + return items, labels + + def _check_load_keypoints(self, coco, entry): + """Check and load ground-truth keypoints""" + ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) + objs = coco.loadAnns(ann_ids) + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + + for obj in objs: + obj['keypoints'] = obj['keypoints'][:self.num_joints * 3] + + contiguous_cid = self.json_id_to_contiguous[obj['category_id']] + if contiguous_cid >= self.num_class: + # not class of interest + continue + if max(obj['keypoints']) == 0: + continue + # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + # require non-zero box area + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility + joints_3d = np.zeros((self.num_joints, 3, 2), dtype=np.float32) + for i in range(self.num_joints): + joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] + joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] + if obj['keypoints'][i * 3 + 2] >= 0.35: + visible = 1 + else: + visible = 0 + joints_3d[i, :2, 1] = visible + + if np.sum(joints_3d[:, 0, 1]) < 1: + # no visible keypoint + continue + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d + }) + + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append({ + 'bbox': np.array([-1, -1, 0, 0]), + 'width': width, + 'height': height, + 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) + }) + return valid_objs + + def _check_load_keypoints_2(self, coco, entry): + """Check and load ground-truth keypoints for coco wholebody""" + ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) + objs = coco.loadAnns(ann_ids) + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + + for obj in objs: + if 'foot_kpts' in obj and 'face_kpts' in obj and 'lefthand_kpts' in obj and 'righthand_kpts' in obj: + obj['keypoints'].extend([0] * 9) # coco wholebody has only 133 kpts + obj['keypoints'].extend(obj['foot_kpts']) + obj['keypoints'].extend(obj['face_kpts']) + obj['keypoints'].extend(obj['lefthand_kpts']) + obj['keypoints'].extend(obj['righthand_kpts']) + obj['keypoints'] = obj['keypoints'][:self.num_joints * 3] + + contiguous_cid = self.json_id_to_contiguous[obj['category_id']] + if contiguous_cid >= self.num_class: + # not class of interest + continue + if max(obj['keypoints']) == 0: + continue + # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + # require non-zero box area + if (xmax - xmin) * (ymax - ymin) <= 0 or xmax <= xmin or ymax <= ymin: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility + joints_3d = np.zeros((self.num_joints, 3, 2), dtype=np.float32) + for i in range(self.num_joints): + joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] + joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] + if obj['keypoints'][i * 3 + 2] >= 0.35: + visible = 1 + else: + visible = 0 + joints_3d[i, :2, 1] = visible + + if np.sum(joints_3d[:, 0, 1]) < 1: + # no visible keypoint + continue + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d + }) + + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append({ + 'bbox': np.array([-1, -1, 0, 0]), + 'width': width, + 'height': height, + 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) + }) + return valid_objs + + def _get_box_center_area(self, bbox): + """Get bbox center""" + c = np.array([(bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0]) + area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) + return c, area + + def _get_keypoints_center_count(self, keypoints): + """Get geometric center of all keypoints""" + keypoint_x = np.sum(keypoints[:, 0, 0] * (keypoints[:, 0, 1] > 0)) + keypoint_y = np.sum(keypoints[:, 1, 0] * (keypoints[:, 1, 1] > 0)) + num = float(np.sum(keypoints[:, 0, 1])) + return np.array([keypoint_x / num, keypoint_y / num]), num diff --git a/alphapose/datasets/halpe_coco_wholebody_26_det.py b/alphapose/datasets/halpe_coco_wholebody_26_det.py new file mode 100644 index 00000000..84442fd7 --- /dev/null +++ b/alphapose/datasets/halpe_coco_wholebody_26_det.py @@ -0,0 +1,107 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by HaoyiZhu +# ----------------------------------------------------- + +"""Halpe plus coco wholebody (26 keypoints) Human Detection Box dataset.""" +import json +import os + +import cv2 +import torch +import torch.utils.data as data +from tqdm import tqdm + +from alphapose.utils.presets import SimpleTransform +from detector.apis import get_detector +from alphapose.models.builder import DATASET + + +@DATASET.register_module +class Halpe_coco_wholebody_26_det(data.Dataset): + """ Halpe plus coco wholebody (26 keypoints) human detection box dataset. + + """ + EVAL_JOINTS = list(range(26)) + + def __init__(self, + det_file=None, + opt=None, + **cfg): + + self._cfg = cfg + self._opt = opt + self._preset_cfg = cfg['PRESET'] + self._root = cfg['ROOT'] + self._img_prefix = cfg['IMG_PREFIX'] + if not det_file: + det_file = cfg['DET_FILE'] + self._ann_file = os.path.join(self._root, cfg['ANN']) + + if os.path.exists(det_file): + print("Detection results exist, will use it") + else: + print("Will create detection results to {}".format(det_file)) + self.write_coco_json(det_file) + + assert os.path.exists(det_file), "Error: no detection results found" + with open(det_file, 'r') as fid: + self._det_json = json.load(fid) + + self._input_size = self._preset_cfg['IMAGE_SIZE'] + self._output_size = self._preset_cfg['HEATMAP_SIZE'] + + self._sigma = self._preset_cfg['SIGMA'] + + if self._preset_cfg['TYPE'] == 'simple': + self.transformation = SimpleTransform( + self, scale_factor=0, + input_size=self._input_size, + output_size=self._output_size, + rot=0, sigma=self._sigma, + train=False, add_dpg=False) + + def __getitem__(self, index): + det_res = self._det_json[index] + if not isinstance(det_res['image_id'], int): + img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) + img_id = int(img_id) + else: + img_id = det_res['image_id'] + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) + + # Load image + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated + + imght, imgwidth = image.shape[1], image.shape[2] + x1, y1, w, h = det_res['bbox'] + bbox = [x1, y1, x1 + w, y1 + h] + inp, bbox = self.transformation.test_transform(image, bbox) + return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) + + def __len__(self): + return len(self._det_json) + + def write_coco_json(self, det_file): + from pycocotools.coco import COCO + import pathlib + + _coco = COCO(self._ann_file) + image_ids = sorted(_coco.getImgIds()) + det_model = get_detector(self._opt) + dets = [] + for entry in tqdm(_coco.loadImgs(image_ids)): + abs_path = os.path.join( + self._root, self._img_prefix, entry['file_name']) + det = det_model.detect_one_img(abs_path) + if det: + dets += det + pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) + json.dump(dets, open(det_file, 'w')) + + @property + def joint_pairs(self): + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + return[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], + [20, 21], [22, 23], [24, 25]] diff --git a/alphapose/datasets/single_hand.py b/alphapose/datasets/single_hand.py new file mode 100644 index 00000000..2ef7bfc4 --- /dev/null +++ b/alphapose/datasets/single_hand.py @@ -0,0 +1,192 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by Haoyi Zhu and Hao-Shu Fang +# ----------------------------------------------------- + +"""Single Hand (21 keypoints) dataset.""" +import os + +import numpy as np +from tkinter import _flatten + +from alphapose.models.builder import DATASET +from alphapose.utils.bbox import bbox_clip_xyxy, bbox_xywh_to_xyxy + +from .custom import CustomDataset + + +@DATASET.register_module +class SingleHand(CustomDataset): + """ Single Hand (21 keypoints) Person dataset. + + Parameters + ---------- + train: bool, default is True + If true, will set as training mode. + skip_empty: bool, default is False + Whether skip entire image if no valid label is found. Use `False` if this dataset is + for validation to avoid COCO metric error. + dpg: bool, default is False + If true, will activate `dpg` for data augmentation. + """ + CLASSES = ['person'] + EVAL_JOINTS = list(range(21)) + num_joints = 21 + CustomDataset.lower_body_ids = () + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + joint_pairs = [] + + def _load_jsons(self): + """Load all image paths and labels from JSON annotation files into buffer.""" + items = [] + labels = [] + + _coco = self._lazy_load_ann_file() + + classes = [c['name'] for c in _coco.loadCats(_coco.getCatIds())] + assert classes == self.CLASSES, "Incompatible category names with COCO. " + + self.json_id_to_contiguous = { + v: k for k, v in enumerate(_coco.getCatIds())} + + # iterate through the annotations + image_ids = sorted(_coco.getImgIds()) + for entry in _coco.loadImgs(image_ids): + abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) + + if not os.path.exists(abs_path): + raise IOError('Image: {} not exists.'.format(abs_path)) + label = self._check_load_keypoints(_coco, entry) + if not label: + continue + + # num of items are relative to person, not image + for obj in label: + items.append({'path': abs_path, 'id': entry['id']}) + labels.append(obj) + + return items, labels + + def _check_load_keypoints(self, coco, entry): + """Check and load ground-truth keypoints""" + ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) + objs = coco.loadAnns(ann_ids) + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + + for obj in objs: + obj['keypoints'] = obj['keypoints'][-42*3:] + contiguous_cid = self.json_id_to_contiguous[obj['category_id']] + if contiguous_cid >= self.num_class: + # not class of interest + continue + if max(obj['keypoints']) == 0: + continue + # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound + if 'bbox' not in obj: + obj['bbox'] = [1, 1, width-1, height-1] + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + + # require non-zero box area + if (xmax-xmin)*(ymax-ymin) <= 0 or xmax <= xmin or ymax <= ymin: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + + # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility + joints_3d = np.zeros((self.num_joints * 2, 3, 2), dtype=np.float32) + for i in range(self.num_joints * 2): + joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] + joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] + if obj['keypoints'][i * 3 + 2] >= 0.35 and obj['keypoints'][i * 3 + 0] > 0 and obj['keypoints'][i * 3 + 1] > 0: + visible = 1 + else: + visible = 0 + joints_3d[i, :2, 1] = visible + + if np.sum(joints_3d[:, 0, 1]) < 1: + # no visible keypoint + continue + + # left hand + if np.sum(joints_3d[:21, 0, 1]) >= 10: + xmin = np.min(joints_3d[:21, 0, 0][joints_3d[:21, 0, 0] > 0]) + ymin = np.min(joints_3d[:21, 1, 0][joints_3d[:21, 1, 0] > 0]) + xmax = np.max(joints_3d[:21, 0, 0][joints_3d[:21, 0, 0] > 0]) + ymax = np.max(joints_3d[:21, 1, 0][joints_3d[:21, 1, 0] > 0]) + w = xmax - xmin + h = ymax - ymin + xmin = max(xmin - np.random.rand() * w / 2, 1) + xmax = min(xmax + np.random.rand() * w / 2, width) + ymin = max(ymin - np.random.rand() * h / 2, 1) + ymax = min(ymax + np.random.rand() * h / 2, height) + obj['bbox'] = [xmin, ymin, xmax - xmin, ymax - ymin] + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d[:21, :, :]) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d[:21, :, :] + }) + + # right hand + if np.sum(joints_3d[21:, 0, 1]) >= 10: + xmin = np.min(joints_3d[21:, 0, 0][joints_3d[21:, 0, 0] > 0]) + ymin = np.min(joints_3d[21:, 1, 0][joints_3d[21:, 1, 0] > 0]) + xmax = np.max(joints_3d[21:, 0, 0][joints_3d[21:, 0, 0] > 0]) + ymax = np.max(joints_3d[21:, 1, 0][joints_3d[21:, 1, 0] > 0]) + w = xmax - xmin + h = ymax - ymin + xmin = max(xmin - np.random.rand() * w / 2, 1) + xmax = min(xmax + np.random.rand() * w / 2, width) + ymin = max(ymin - np.random.rand() * h / 2, 1) + ymax = min(ymax + np.random.rand() * h / 2, height) + obj['bbox'] = [xmin, ymin, xmax - xmin, ymax - ymin] + + if self._check_centers and self._train: + bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) + kp_center, num_vis = self._get_keypoints_center_count(joints_3d[21:, :, :]) + ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) + if (num_vis / 80.0 + 47 / 80.0) > ks: + continue + + valid_objs.append({ + 'bbox': (xmin, ymin, xmax, ymax), + 'width': width, + 'height': height, + 'joints_3d': joints_3d[21:, :, :] + }) + + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append({ + 'bbox': np.array([-1, -1, 0, 0]), + 'width': width, + 'height': height, + 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) + }) + return valid_objs + + def _get_box_center_area(self, bbox): + """Get bbox center""" + c = np.array([(bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0]) + area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) + return c, area + + def _get_keypoints_center_count(self, keypoints): + """Get geometric center of all keypoints""" + keypoint_x = np.sum(keypoints[:, 0, 0] * (keypoints[:, 0, 1] > 0)) + keypoint_y = np.sum(keypoints[:, 1, 0] * (keypoints[:, 1, 1] > 0)) + num = float(np.sum(keypoints[:, 0, 1])) + return np.array([keypoint_x / num, keypoint_y / num]), num diff --git a/alphapose/datasets/single_hand_det.py b/alphapose/datasets/single_hand_det.py new file mode 100644 index 00000000..c9e470a6 --- /dev/null +++ b/alphapose/datasets/single_hand_det.py @@ -0,0 +1,106 @@ +# ----------------------------------------------------- +# Copyright (c) Shanghai Jiao Tong University. All rights reserved. +# Written by HaoyiZhu +# ----------------------------------------------------- + +"""Single Hand (21 keypoints) Detection Box dataset.""" +import json +import os + +import cv2 +import torch +import torch.utils.data as data +from tqdm import tqdm + +from alphapose.utils.presets import SimpleTransform +from detector.apis import get_detector +from alphapose.models.builder import DATASET + + +@DATASET.register_module +class SingleHand_det(data.Dataset): + """ Single Hand (21 keypoints) detection box dataset. + + """ + EVAL_JOINTS = list(range(21)) + + def __init__(self, + det_file=None, + opt=None, + **cfg): + + self._cfg = cfg + self._opt = opt + self._preset_cfg = cfg['PRESET'] + self._root = cfg['ROOT'] + self._img_prefix = cfg['IMG_PREFIX'] + if not det_file: + det_file = cfg['DET_FILE'] + self._ann_file = os.path.join(self._root, cfg['ANN']) + + if os.path.exists(det_file): + print("Detection results exist, will use it") + else: + print("Will create detection results to {}".format(det_file)) + self.write_coco_json(det_file) + + assert os.path.exists(det_file), "Error: no detection results found" + with open(det_file, 'r') as fid: + self._det_json = json.load(fid) + + self._input_size = self._preset_cfg['IMAGE_SIZE'] + self._output_size = self._preset_cfg['HEATMAP_SIZE'] + + self._sigma = self._preset_cfg['SIGMA'] + + if self._preset_cfg['TYPE'] == 'simple': + self.transformation = SimpleTransform( + self, scale_factor=0, + input_size=self._input_size, + output_size=self._output_size, + rot=0, sigma=self._sigma, + train=False, add_dpg=False) + + def __getitem__(self, index): + det_res = self._det_json[index] + if not isinstance(det_res['image_id'], int): + img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) + img_id = int(img_id) + else: + img_id = det_res['image_id'] + img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) + + # Load image + image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # scipy.misc.imread(img_path, mode='RGB') is deprecated + + imght, imgwidth = image.shape[1], image.shape[2] + x1, y1, w, h = det_res['bbox'] + bbox = [x1, y1, x1 + w, y1 + h] + inp, bbox = self.transformation.test_transform(image, bbox) + return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) + + def __len__(self): + return len(self._det_json) + + def write_coco_json(self, det_file): + from pycocotools.coco import COCO + import pathlib + + _coco = COCO(self._ann_file) + image_ids = sorted(_coco.getImgIds()) + det_model = get_detector(self._opt) + dets = [] + for entry in tqdm(_coco.loadImgs(image_ids)): + abs_path = os.path.join( + self._root, self._img_prefix, entry['file_name']) + det = det_model.detect_one_img(abs_path) + if det: + dets += det + pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) + json.dump(dets, open(det_file, 'w')) + + @property + def joint_pairs(self): + """Joint pairs which defines the pairs of joint to be swapped + when the image is flipped horizontally.""" + return [] \ No newline at end of file diff --git a/alphapose/utils/metrics.py b/alphapose/utils/metrics.py index 9affe087..5109d4a1 100644 --- a/alphapose/utils/metrics.py +++ b/alphapose/utils/metrics.py @@ -1,6 +1,6 @@ # ----------------------------------------------------- # Copyright (c) Shanghai Jiao Tong University. All rights reserved. -# Written by Jiefeng Li (jeff.lee.sjtu@gmail.com), Haoyi Zhu +# Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) # ----------------------------------------------------- import os @@ -11,10 +11,6 @@ import torch.nn.functional as F from .transforms import get_max_pred_batch, _integral_tensor -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - - class DataLogger(object): """Average data logger.""" def __init__(self): @@ -66,7 +62,7 @@ def mask_cross_entropy(pred, target): pred, target, reduction='mean')[None] -def evaluate_mAP(res_file, ann_type='bbox', ann_file='./data/coco/annotations/person_keypoints_val2017.json', silence=True): +def evaluate_mAP(res_file, ann_type='bbox', ann_file='./data/coco/annotations/person_keypoints_val2017.json', silence=True, halpe=False): """Evaluate mAP result for coco dataset. Parameters @@ -92,6 +88,13 @@ def write(self, arg): oldstdout = sys.stdout sys.stdout = nullwrite # disable output + if halpe: + from halpecocotools.coco import COCO + from halpecocotools.cocoeval import COCOeval + else: + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + cocoGt = COCO(ann_file) cocoDt = cocoGt.loadRes(res_file) @@ -100,13 +103,10 @@ def write(self, arg): cocoEval.accumulate() cocoEval.summarize() - if silence: - sys.stdout = oldstdout # enable output - if isinstance(cocoEval.stats[0], dict): stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] - parts = ['body', 'face', 'hand', 'fullbody'] + parts = ['body', 'foot', 'face', 'hand', 'fullbody'] info = {} for i, part in enumerate(parts): diff --git a/alphapose/utils/pPose_nms.py b/alphapose/utils/pPose_nms.py index 358d6ec4..6acbccb4 100644 --- a/alphapose/utils/pPose_nms.py +++ b/alphapose/utils/pPose_nms.py @@ -19,7 +19,13 @@ alpha = 0.1 vis_thr = 0.2 oks_thr = 0.9 -#pool = ThreadPool(4) + +face_factor = 1.9 +hand_factor = 0.55 +hand_weight_score = 0.1 +face_weight_score = 1.0 +hand_weight_dist = 1.5 +face_weight_dist = 1.0 def oks_pose_nms(data, soft=False): @@ -152,14 +158,14 @@ def oks_iou(g, d, a_g, a_d, sigmas=None, vis_thr=None): list: The oks ious. """ if sigmas is None: - if len(g) == 408: # 136keypoints + if len(g) == 408: # 136 keypoints for Halpe-FullBody dataset sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89, .8,.8,.8,.89, .89, .89, .89, .89, .89, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25, .25])/10.0 - elif len(g) == 399: + elif len(g) == 399: # 133 keypoints for COCO WholeBody dataset sigmas = np.array([.026, .025, .025, .035, .035, .079, .079, .072, .072, .062, .062, 0.107, 0.107, .087, .087, .089, .089, 0.068, 0.066, 0.066, 0.092, 0.094, 0.094, 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, @@ -218,8 +224,22 @@ def _rescore(overlap, scores, thr, type='gaussian'): return scores +def pose_nms(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0, use_heatmap_loss=True): + if pose_preds.size()[1] == 136 or pose_preds.size()[1] == 133: + if not use_heatmap_loss: + global delta1, mu, delta2, gamma, scoreThreds, matchThreds, alpha + delta1 = 1.0 + mu = 1.65 + delta2 = 8.0 + gamma = 3.6 + scoreThreds = 0.01 + matchThreds = 3.0 + alpha = 0.15 + return pose_nms_fullbody(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres) + else: + return pose_nms_body(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres) -def pose_nms(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0): +def pose_nms_body(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0): ''' Parametric Pose NMS algorithm bboxes: bbox locations list (n, 4) @@ -271,7 +291,7 @@ def pose_nms(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0 # Delete humans who have more than matchThreds keypoints overlap and high similarity delete_ids = torch.from_numpy(np.arange(human_scores[tensor_mask].shape[0]))[((simi > gamma) | (num_match_keypoints >= matchThreds))] - + if delete_ids.shape[0] == 0: delete_ids = pick_id @@ -280,7 +300,6 @@ def pose_nms(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0 newmask[delete_ids] = False mask[mask] = newmask - assert len(merge_ids) == len(pick) preds_pick = ori_pose_preds[pick] scores_pick = ori_pose_scores[pick] @@ -324,7 +343,109 @@ def pose_nms(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0 res_pose_scores.append(merge_score) res_pick_ids.append(pick[j]) - + return res_bboxes, res_bbox_scores, res_bbox_ids, res_pose_preds, res_pose_scores, res_pick_ids + +def pose_nms_fullbody(bboxes, bbox_scores, bbox_ids, pose_preds, pose_scores, areaThres=0): + ''' + Parametric Pose NMS algorithm + bboxes: bbox locations list (n, 4) + bbox_scores: bbox scores list (n, 1) + bbox_ids: bbox tracking ids list (n, 1) + pose_preds: pose locations list (n, kp_num, 2) + pose_scores: pose scores list (n, kp_num, 1) + ''' + #global ori_pose_preds, ori_pose_scores, ref_dists + + pose_scores[pose_scores == 0] = 1e-5 + kp_nums = pose_preds.size()[1] + res_bboxes, res_bbox_scores, res_bbox_ids, res_pose_preds, res_pose_scores, res_pick_ids = [],[],[],[],[],[] + + ori_bboxes = bboxes.clone() + ori_bbox_scores = bbox_scores.clone() + ori_bbox_ids = bbox_ids.clone() + ori_pose_preds = pose_preds.clone() + ori_pose_scores = pose_scores.clone() + + xmax = bboxes[:, 2] + xmin = bboxes[:, 0] + ymax = bboxes[:, 3] + ymin = bboxes[:, 1] + + widths = xmax - xmin + heights = ymax - ymin + ref_dists = alpha * np.maximum(widths, heights) + + nsamples = bboxes.shape[0] + human_scores = pose_scores[:, :, :].mean(dim=1) + + human_ids = np.arange(nsamples) + mask = np.ones(len(human_ids)).astype(bool) + + # Do pPose-NMS + pick = [] + merge_ids = [] + while(mask.any()): + tensor_mask = torch.Tensor(mask)==True + # Pick the one with highest score + pick_id = torch.argmax(human_scores[tensor_mask]) + pick.append(human_ids[mask][pick_id]) + + # Get numbers of match keypoints by calling PCK_match + ref_dist = ref_dists[human_ids[mask][pick_id]] + simi = get_parametric_distance(pick_id, pose_preds[:, :, :][tensor_mask], pose_scores[:, :, :][tensor_mask], ref_dist, use_dist_mask=True) + num_match_keypoints = PCK_match_fullbody(pose_preds[:, :, :][tensor_mask][pick_id], pose_scores[:, :, :][tensor_mask][pick_id], pose_preds[:, :, :][tensor_mask], ref_dist) + + delete_ids = torch.from_numpy(np.arange(human_scores[tensor_mask].shape[0]))[((((simi > gamma) | (num_match_keypoints >= matchThreds))))] + + if delete_ids.shape[0] == 0: + delete_ids = pick_id + + merge_ids.append(human_ids[mask][delete_ids]) + newmask = mask[mask] + newmask[delete_ids] = False + mask[mask] = newmask + + assert len(merge_ids) == len(pick) + preds_pick = ori_pose_preds[pick] + scores_pick = ori_pose_scores[pick] + bbox_scores_pick = ori_bbox_scores[pick] + bboxes_pick = ori_bboxes[pick] + bbox_ids_pick = ori_bbox_ids[pick] + + for j in range(len(pick)): + ids = np.arange(kp_nums) + max_score = torch.max(scores_pick[j, ids, 0]) + + if (max_score < scoreThreds): + continue + + # Merge poses + merge_id = merge_ids[j] + merge_pose, merge_score = p_merge_fast( + preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]]) + + max_score = torch.max(merge_score[ids]) + + if (max_score < scoreThreds): + continue + + xmax = max(merge_pose[ids, 0]) + xmin = min(merge_pose[ids, 0]) + ymax = max(merge_pose[ids, 1]) + ymin = min(merge_pose[ids, 1]) + bbox = bboxes_pick[j].cpu().tolist() + bbox_score = bbox_scores_pick[j].cpu() + + if (1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres): + continue + + res_bboxes.append(bbox) + res_bbox_scores.append(bbox_score) + res_bbox_ids.append(ori_bbox_ids[merge_id].tolist()) + res_pose_preds.append(merge_pose) + res_pose_scores.append(merge_score) + res_pick_ids.append(pick[j]) + return res_bboxes, res_bbox_scores, res_bbox_ids, res_pose_preds, res_pose_scores, res_pick_ids @@ -430,6 +551,7 @@ def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist): ref_dist = min(ref_dist, 15) mask = (dist <= ref_dist) + final_pose = torch.zeros(kp_num, 2) final_score = torch.zeros(kp_num) @@ -448,7 +570,7 @@ def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist): return final_pose, final_score -def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist): +def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist, use_dist_mask=False): pick_preds = all_preds[i] pred_scores = keypoint_scores[i] dist = torch.sqrt(torch.sum( @@ -458,6 +580,11 @@ def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist): mask = (dist <= 1) kp_nums = all_preds.size()[1] + + if use_dist_mask: + dist_mask = (keypoint_scores.reshape((-1, kp_nums)) < scoreThreds) + mask = mask * dist_mask + # Define a keypoints distance score_dists = torch.zeros(all_preds.shape[0], kp_nums) keypoint_scores.squeeze_() @@ -471,7 +598,14 @@ def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist): score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) * torch.tanh(keypoint_scores[mask] / delta1) point_dist = torch.exp((-1) * dist / delta2) - final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1) + if use_dist_mask: + point_dist[:, -110:-42] = torch.exp((-1) * dist[:, -110:-42] / (delta2 * face_factor)) + point_dist[:, -42:] = torch.exp((-1) * dist[:, -42:] / (delta2 * hand_factor)) + point_dist[dist_mask] = 0 + final_dist = torch.mean(score_dists[:, :-110], dim=1) + torch.mean(score_dists[:, -110:-42], dim=1) * face_weight_score + torch.mean(score_dists[:, -42:], dim=1) * hand_weight_score\ + + mu * (torch.mean(point_dist[:, :-110], dim=1) + torch.mean(point_dist[:, -110:-42], dim=1) * face_weight_dist + torch.mean(point_dist[:, -42:], dim=1) * hand_weight_dist) + else: + final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1) return final_dist @@ -490,7 +624,39 @@ def PCK_match(pick_pred, all_preds, ref_dist): return num_match_keypoints -def write_json(all_results, outputpath, form=None, for_eval=False): +def PCK_match_fullbody(pick_pred, pred_score, all_preds, ref_dist): + kp_nums = pred_score.shape[0] + + mask = (pred_score.reshape(1, kp_nums, 1).repeat(all_preds.shape[0], 1,2) > scoreThreds / 2).float() + if mask.sum() < 2: + return torch.zeros(all_preds.shape[0]) + + dist = torch.sqrt(torch.sum( + torch.pow(pick_pred[np.newaxis, :] - all_preds, 2), + dim=2 + )) + + ref_dist = min(ref_dist, 7) + num_match_keypoints_body = torch.sum( + dist[:,:26] / ref_dist <= 1, + dim=1 + ) + + num_match_keypoints_face = torch.sum( + dist[:,26:94] / ref_dist <= face_factor, + dim=1 + ) + + num_match_keypoints_hand = torch.sum( + dist[:,94:] / ref_dist <= hand_factor, + dim=1 + ) + + num_match_keypoints = (num_match_keypoints_body + num_match_keypoints_face + num_match_keypoints_hand) / mask.sum() / 2 * kp_nums + return num_match_keypoints + + +def write_json(all_results, outputpath, form=None, for_eval=False, outputfile='alphapose-results.json'): ''' all_result: result dict of predictions outputpath: output directory @@ -557,7 +723,7 @@ def write_json(all_results, outputpath, form=None, for_eval=False): json_results.append(result) if form == 'cmu': # the form of CMU-Pose - with open(os.path.join(outputpath,'alphapose-results.json'), 'w') as json_file: + with open(os.path.join(outputpath, outputfile), 'w') as json_file: json_file.write(json.dumps(json_results_cmu)) if not os.path.exists(os.path.join(outputpath,'sep-json')): os.mkdir(os.path.join(outputpath,'sep-json')) @@ -565,7 +731,7 @@ def write_json(all_results, outputpath, form=None, for_eval=False): with open(os.path.join(outputpath,'sep-json',name.split('.')[0]+'.json'),'w') as json_file: json_file.write(json.dumps(json_results_cmu[name])) elif form == 'open': # the form of OpenPose - with open(os.path.join(outputpath,'alphapose-results.json'), 'w') as json_file: + with open(os.path.join(outputpath, outputfile), 'w') as json_file: json_file.write(json.dumps(json_results_cmu)) if not os.path.exists(os.path.join(outputpath,'sep-json')): os.mkdir(os.path.join(outputpath,'sep-json')) @@ -573,6 +739,48 @@ def write_json(all_results, outputpath, form=None, for_eval=False): with open(os.path.join(outputpath,'sep-json',name.split('.')[0]+'.json'),'w') as json_file: json_file.write(json.dumps(json_results_cmu[name])) else: - with open(os.path.join(outputpath,'alphapose-results.json'), 'w') as json_file: + with open(os.path.join(outputpath, outputfile), 'w') as json_file: json_file.write(json.dumps(json_results)) + +def ppose_nms_validate_preprocess(_res): + res = {} + for data in _res: + if data['image_id'] not in res.keys(): + res[data['image_id']] = [] + res[data['image_id']].append(data) + + _tmp_data = {} + for key in res.keys(): + pose_coords = [] + pose_scores = [] + bboxes = [] + scores = [] + ids = [] + i = 0 + + cur = res[key] + for pose in cur: + bboxes.append([pose['bbox'][0], pose['bbox'][1], pose['bbox'][0]+pose['bbox'][2], pose['bbox'][1]+pose['bbox'][3]]) + + kpts = np.array(pose['keypoints'], dtype=np.float32).reshape((-1, 3)) + coords = kpts[:, 0:2] + p_scores = kpts[:, 2] + s = pose['score'] - np.mean(p_scores) - 1.25 * np.max(p_scores) + scores.append(s) + + pose_coords.append(torch.from_numpy(coords).unsqueeze(0)) + pose_scores.append(torch.from_numpy(p_scores).unsqueeze(0)) + + ids.append(i) + i += 1 + preds_img = torch.cat(pose_coords) + preds_scores = torch.cat(pose_scores)[:, :, None] + boxes = torch.from_numpy(np.array(bboxes, dtype=np.float32)) + scores = torch.from_numpy(np.array(scores, dtype=np.float32).reshape(-1, 1)) + ids = torch.from_numpy(np.array(ids, dtype=np.float32).reshape(-1, 1)) + + _tmp_data[key] = (boxes, scores, ids, preds_img, preds_scores) + + + return _tmp_data \ No newline at end of file diff --git a/alphapose/utils/presets/simple_transform.py b/alphapose/utils/presets/simple_transform.py index ce72dd43..e0260414 100644 --- a/alphapose/utils/presets/simple_transform.py +++ b/alphapose/utils/presets/simple_transform.py @@ -162,7 +162,7 @@ def _target_generator(self, joints_3d, num_joints): return target, np.expand_dims(target_weight, -1) - def _integral_target_generator(self, joints_3d, num_joints, patch_height, patch_width, source=None): + def _integral_target_generator(self, joints_3d, num_joints, patch_height, patch_width): target_weight = np.ones((num_joints, 2), dtype=np.float32) target_weight[:, 0] = joints_3d[:, 0, 1] target_weight[:, 1] = joints_3d[:, 0, 1] @@ -170,13 +170,8 @@ def _integral_target_generator(self, joints_3d, num_joints, patch_height, patch_ target_weight[:26, :] = target_weight[:26, :] * 2 elif num_joints == 133: target_weight[:23, :] = target_weight[:23, :] * 2 - - if source == 'frei' or source == 'partX' or source == 'OneHand' or source == 'hand_labels_synth' \ - or source == 'hand143_panopticdb' or source == 'RHD_published_v2' or source == 'interhand': - if target_weight[-21:,:].sum() > 0 and target_weight[-42:-21].sum() == 0: - target_weight[-42:-21] += 1 - elif target_weight[-21:,:].sum() == 0 and target_weight[-42:-21].sum() > 0: - target_weight[-21:,:] += 1 + elif num_joints == 68: + target_weight[:26, :] = target_weight[:26, :] * 2 target = np.zeros((num_joints, 2), dtype=np.float32) target[:, 0] = joints_3d[:, 0, 0] / patch_width - 0.5 @@ -186,7 +181,7 @@ def _integral_target_generator(self, joints_3d, num_joints, patch_height, patch_ target_weight = target_weight.reshape((-1)) return target, target_weight - def __call__(self, src, label, source=None): + def __call__(self, src, label): bbox = list(label['bbox']) gt_joints = label['joints_3d'] @@ -224,10 +219,7 @@ def __call__(self, src, label, source=None): # rotation if self._train: - if source == 'frei' or source == 'partX' or source == 'OneHand' or source == 'interhand': - rf = 180 - else: - rf = self._rot + rf = self._rot r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 else: r = 0 @@ -255,7 +247,14 @@ def __call__(self, src, label, source=None): if self._loss_type == 'MSELoss': target, target_weight = self._target_generator(joints, self.num_joints) elif 'JointRegression' in self._loss_type: - target, target_weight = self._integral_target_generator(joints, self.num_joints, inp_h, inp_w, source) + target, target_weight = self._integral_target_generator(joints, self.num_joints, inp_h, inp_w) + elif self._loss_type == 'Combined': + if self.num_joints == 68: + hand_face_num = 42 + else: + hand_face_num = 110 + target_mse, target_weight_mse = self._target_generator(joints[:-hand_face_num,:,:], self.num_joints-hand_face_num) + target_inter, target_weight_inter = self._integral_target_generator(joints[-hand_face_num:,:,:], hand_face_num, inp_h, inp_w) bbox = _center_scale_to_box(center, scale) @@ -263,8 +262,11 @@ def __call__(self, src, label, source=None): img[0].add_(-0.406) img[1].add_(-0.457) img[2].add_(-0.480) - - return img, torch.from_numpy(target), torch.from_numpy(target_weight), torch.Tensor(bbox) + + if self._loss_type == 'Combined': + return img, [torch.from_numpy(target_mse), torch.from_numpy(target_inter)], [torch.from_numpy(target_weight_mse), torch.from_numpy(target_weight_inter)], torch.Tensor(bbox) + else: + return img, torch.from_numpy(target), torch.from_numpy(target_weight), torch.Tensor(bbox) def half_body_transform(self, joints, joints_vis): upper_joints = [] diff --git a/alphapose/utils/transforms.py b/alphapose/utils/transforms.py index f3029008..6052e2d0 100644 --- a/alphapose/utils/transforms.py +++ b/alphapose/utils/transforms.py @@ -806,5 +806,7 @@ def get_func_heatmap_to_coord(cfg): return heatmap_to_coord_simple elif cfg.LOSS.TYPE == 'L1JointRegression': return heatmap_to_coord_simple_regress + elif cfg.LOSS.TYPE == 'Combined': + return [heatmap_to_coord_simple, heatmap_to_coord_simple_regress] else: - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/alphapose/utils/vis.py b/alphapose/utils/vis.py index a97204bb..3e720d1c 100644 --- a/alphapose/utils/vis.py +++ b/alphapose/utils/vis.py @@ -32,7 +32,7 @@ def get_color_fast(idx): return color -def vis_frame_fast(frame, im_res, opt, format='coco'): +def vis_frame_fast(frame, im_res, opt, vis_thres, format='coco'): ''' frame: frame image im_res: im_res of predictions @@ -42,7 +42,7 @@ def vis_frame_fast(frame, im_res, opt, format='coco'): ''' kp_num = 17 if len(im_res['result']) > 0: - kp_num = len(im_res['result'][0]['keypoints']) + kp_num = len(im_res['result'][0]['keypoints']) if kp_num == 17: if format == 'coco': l_pair = [ @@ -90,6 +90,56 @@ def vis_frame_fast(frame, im_res, opt, format='coco'): (77, 255, 255), (0, 255, 255), (77, 204, 255), # head, neck, shoulder (0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), (77, 255, 255)] # foot + line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), + (0, 255, 102), (77, 255, 222), (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), + (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), + (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36), + (0, 77, 255), (0, 77, 255), (0, 77, 255), (0, 77, 255), (255, 156, 127), (255, 156, 127)] + elif kp_num == 133: + l_pair = [ + (0, 1), (0, 2), (1, 3), (2, 4), # Head + (5, 7), (7, 9), (6, 8), (8, 10),# Body + (11, 13), (12, 14), (13, 15), (14, 16), + (18, 19), (21, 22), (20, 22), (17, 19), (15, 19), (16, 22), + (23, 24), (24, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30), (30, 31), (31, 32), (32, 33), (33, 34), (34, 35), + (35, 36), (36, 37), (37, 38), (38, 39), (40, 41), (41, 42), (42, 43), (43, 44), (45, 46), (46, 47), (47, 48), (48, 49), + (50, 51), (51, 52), (52, 53), (54, 55), (55, 56), (56, 57), (57, 58), (59, 60), (60, 61), (61, 62), (62, 63), (63, 64), + (65, 66), (66, 67), (67, 68), (68, 69), (69, 70), (71, 72), (72, 73), (73, 74), (74, 75), (75, 76), (76, 77), (77, 78), + (78, 79), (79, 80), (80, 81), (81, 82), (82, 83), (83, 84), (84, 85), (85, 86), (86, 87), (87, 88), (88, 89), (89, 90), + (91, 92), (92, 93), (93, 94), (94, 95), (91, 96), (96, 97), (97, 98), (98, 99), (91, 100), (100, 101), (101, 102), + (102, 103), (91, 104), (104, 105), (105, 106), (106, 107), (91, 108), (108, 109), (109, 110), (110, 111), (112, 113), + (113, 114), (114, 115), (115, 116), (112, 117), (117, 118), (118, 119), (119, 120), (112, 121), (121, 122), (122, 123), + (123, 124), (112, 125), (125, 126), (126, 127), (127, 128), (112, 129), (129, 130), (130, 131), (131, 132) + ] + p_color = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar + (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist + (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck + (0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), (77, 255, 255)] # foot + + line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), + (0, 255, 102), (77, 255, 222), (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), + (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), + (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36), + (0, 77, 255), (0, 77, 255), (0, 77, 255), (0, 77, 255)] + elif kp_num == 68: + l_pair = [ + (0, 1), (0, 2), (1, 3), (2, 4), # Head + (5, 18), (6, 18), (5, 7), (7, 9), (6, 8), (8, 10),# Body + (17, 18), (18, 19), (19, 11), (19, 12), + (11, 13), (12, 14), (13, 15), (14, 16), + (20, 24), (21, 25), (23, 25), (22, 24), (15, 24), (16, 25),# Foot + (26, 27), (27, 28), (28, 29), (29, 30), (26, 31), (31, 32), (32, 33), (33, 34), + (26, 35), (35, 36), (36, 37), (37, 38), (26, 39), (39, 40), (40, 41), (41, 42), + (26, 43), (43, 44), (44, 45), (45, 46), (47, 48), (48, 49), (49, 50), (50, 51), + (47, 52), (52, 53), (53, 54), (54, 55), (47, 56), (56, 57), (57, 58), (58, 59), + (47, 60), (60, 61), (61, 62), (62, 63), (47, 64), (64, 65), (65, 66), (66, 67) + ] + p_color = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar + (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist + (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck + (77, 255, 255), (0, 255, 255), (77, 204, 255), # head, neck, shoulder + (0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), (77, 255, 255)] # foot + line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (0, 255, 102), (77, 255, 222), (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), @@ -114,6 +164,26 @@ def vis_frame_fast(frame, im_res, opt, format='coco'): (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36), (0, 77, 255), (0, 77, 255), (0, 77, 255), (0, 77, 255), (255, 156, 127), (255, 156, 127)] + elif kp_num == 21: + l_pair = [ + (0, 1), (1, 2), (2, 3), (3, 4), (0, 5), (5, 6), (6, 7), (7, 8), + (0, 9), (9, 10), (10, 11), (11, 12), (0, 13), (13, 14), (14, 15), + (15, 16), (0, 17), (17, 18), (18, 19), (19, 20), (21, 22), (22, 23), + (23, 24), (24, 25), (21, 26), (26, 27), (27, 28), (28, 29), (21, 30), + (30, 31), (31, 32), (32, 33), (21, 34), (34, 35), (35, 36), (36, 37), + (21, 38), (38, 39), (39, 40), (40, 41) + ] + p_color = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255) ] + + line_color = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255) ] else: raise NotImplementedError # im_name = os.path.basename(im_res['imgname']) @@ -149,9 +219,8 @@ def vis_frame_fast(frame, im_res, opt, format='coco'): if opt.tracking: cv2.putText(img, str(human['idx']), (int(bbox[0]), int((bbox[2] + 26))), DEFAULT_FONT, 1, BLACK, 2) # Draw keypoints - vis_thres = 0.05 if kp_num == 136 else 0.4 for n in range(kp_scores.shape[0]): - if kp_scores[n] <= vis_thres: + if kp_scores[n] <= vis_thres[n]: continue cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) part_line[n] = (cor_x, cor_y) @@ -178,7 +247,7 @@ def vis_frame_fast(frame, im_res, opt, format='coco'): return img -def vis_frame(frame, im_res, opt, format='coco'): +def vis_frame(frame, im_res, opt, vis_thres, format='coco'): ''' frame: frame image im_res: im_res of predictions @@ -188,7 +257,7 @@ def vis_frame(frame, im_res, opt, format='coco'): ''' kp_num = 17 if len(im_res['result']) > 0: - kp_num = len(im_res['result'][0]['keypoints']) + kp_num = len(im_res['result'][0]['keypoints']) if kp_num == 17: if format == 'coco': @@ -239,6 +308,56 @@ def vis_frame(frame, im_res, opt, format='coco'): (77, 255, 255), (0, 255, 255), (77, 204, 255), # head, neck, shoulder (0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), (77, 255, 255)] # foot + line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), + (0, 255, 102), (77, 255, 222), (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), + (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), + (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36), + (0, 77, 255), (0, 77, 255), (0, 77, 255), (0, 77, 255), (255, 156, 127), (255, 156, 127)] + elif kp_num == 133: + l_pair = [ + (0, 1), (0, 2), (1, 3), (2, 4), # Head + (5, 7), (7, 9), (6, 8), (8, 10),# Body + (11, 13), (12, 14), (13, 15), (14, 16), + (18, 19), (21, 22), (20, 22), (17, 19), (15, 19), (16, 22), + (23, 24), (24, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30), (30, 31), (31, 32), (32, 33), (33, 34), (34, 35), + (35, 36), (36, 37), (37, 38), (38, 39), (40, 41), (41, 42), (42, 43), (43, 44), (45, 46), (46, 47), (47, 48), (48, 49), + (50, 51), (51, 52), (52, 53), (54, 55), (55, 56), (56, 57), (57, 58), (59, 60), (60, 61), (61, 62), (62, 63), (63, 64), + (65, 66), (66, 67), (67, 68), (68, 69), (69, 70), (71, 72), (72, 73), (73, 74), (74, 75), (75, 76), (76, 77), (77, 78), + (78, 79), (79, 80), (80, 81), (81, 82), (82, 83), (83, 84), (84, 85), (85, 86), (86, 87), (87, 88), (88, 89), (89, 90), + (91, 92), (92, 93), (93, 94), (94, 95), (91, 96), (96, 97), (97, 98), (98, 99), (91, 100), (100, 101), (101, 102), + (102, 103), (91, 104), (104, 105), (105, 106), (106, 107), (91, 108), (108, 109), (109, 110), (110, 111), (112, 113), + (113, 114), (114, 115), (115, 116), (112, 117), (117, 118), (118, 119), (119, 120), (112, 121), (121, 122), (122, 123), + (123, 124), (112, 125), (125, 126), (126, 127), (127, 128), (112, 129), (129, 130), (130, 131), (131, 132) + ] + p_color = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar + (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist + (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck + (0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), (77, 255, 255)] # foot + + line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), + (0, 255, 102), (77, 255, 222), (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), + (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), + (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36), + (0, 77, 255), (0, 77, 255), (0, 77, 255), (0, 77, 255)] + elif kp_num == 68: + l_pair = [ + (0, 1), (0, 2), (1, 3), (2, 4), # Head + (5, 18), (6, 18), (5, 7), (7, 9), (6, 8), (8, 10),# Body + (17, 18), (18, 19), (19, 11), (19, 12), + (11, 13), (12, 14), (13, 15), (14, 16), + (20, 24), (21, 25), (23, 25), (22, 24), (15, 24), (16, 25),# Foot + (26, 27), (27, 28), (28, 29), (29, 30), (26, 31), (31, 32), (32, 33), (33, 34), + (26, 35), (35, 36), (36, 37), (37, 38), (26, 39), (39, 40), (40, 41), (41, 42), + (26, 43), (43, 44), (44, 45), (45, 46), (47, 48), (48, 49), (49, 50), (50, 51), + (47, 52), (52, 53), (53, 54), (54, 55), (47, 56), (56, 57), (57, 58), (58, 59), + (47, 60), (60, 61), (61, 62), (62, 63), (47, 64), (64, 65), (65, 66), (66, 67) + ] + p_color = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar + (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist + (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck + (77, 255, 255), (0, 255, 255), (77, 204, 255), # head, neck, shoulder + (0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), (77, 255, 255)] # foot + line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (0, 255, 102), (77, 255, 222), (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), @@ -263,6 +382,26 @@ def vis_frame(frame, im_res, opt, format='coco'): (77, 191, 255), (204, 77, 255), (77, 222, 255), (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36), (0, 77, 255), (0, 77, 255), (0, 77, 255), (0, 77, 255), (255, 156, 127), (255, 156, 127)] + elif kp_num == 21: + l_pair = [ + (0, 1), (1, 2), (2, 3), (3, 4), (0, 5), (5, 6), (6, 7), (7, 8), + (0, 9), (9, 10), (10, 11), (11, 12), (0, 13), (13, 14), (14, 15), + (15, 16), (0, 17), (17, 18), (18, 19), (19, 20), (21, 22), (22, 23), + (23, 24), (24, 25), (21, 26), (26, 27), (27, 28), (28, 29), (21, 30), + (30, 31), (31, 32), (32, 33), (21, 34), (34, 35), (35, 36), (36, 37), + (21, 38), (38, 39), (39, 40), (40, 41) + ] + p_color = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255) ] + + line_color = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), + (255, 255, 255) ] else: raise NotImplementedError # im_name = os.path.basename(im_res['imgname']) @@ -299,9 +438,8 @@ def vis_frame(frame, im_res, opt, format='coco'): cv2.putText(img, str(human['idx']), (int(bbox[0]), int((bbox[2] + 26))), DEFAULT_FONT, 1, BLACK, 2) # Draw keypoints - vis_thres = 0.05 if kp_num == 136 else 0.4 for n in range(kp_scores.shape[0]): - if kp_scores[n] <= vis_thres: + if kp_scores[n] <= vis_thres[n]: continue cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) part_line[n] = (int(cor_x), int(cor_y)) diff --git a/alphapose/utils/writer.py b/alphapose/utils/writer.py index 6c75bbc3..ec420c80 100644 --- a/alphapose/utils/writer.py +++ b/alphapose/utils/writer.py @@ -47,6 +47,22 @@ def __init__(self, cfg, opt, save_video=False, from trackers.PoseFlow.poseflow_infer import PoseFlowWrapper self.pose_flow_wrapper = PoseFlowWrapper(save_path=os.path.join(opt.outputpath, 'poseflow')) + if self.opt.save_img or self.save_video or self.opt.vis: + loss_type = self.cfg.DATA_PRESET.get('LOSS_TYPE', 'MSELoss') + num_joints = self.cfg.DATA_PRESET.NUM_JOINTS + if loss_type == 'MSELoss': + self.vis_thres = [0.4] * num_joints + elif 'JointRegression' in loss_type: + self.vis_thres = [0.05] * num_joints + elif loss_type == 'Combined': + if num_joints == 68: + hand_face_num = 42 + else: + hand_face_num = 110 + self.vis_thres = [0.4] * (num_joints - hand_face_num) + [0.05] * hand_face_num + + self.use_heatmap_loss = (self.cfg.DATA_PRESET.get('LOSS_TYPE', 'MSELoss') == 'MSELoss') + def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) @@ -95,24 +111,39 @@ def update(self): else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 - #pred = hm_data.cpu().data.numpy() + face_hand_num = 110 if hm_data.size()[1] == 136: self.eval_joints = [*range(0,136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0,26)] + elif hm_data.size()[1] == 133: + self.eval_joints = [*range(0,133)] + elif hm_data.size()[1] == 68: + face_hand_num = 42 + self.eval_joints = [*range(0,68)] + elif hm_data.size()[1] == 21: + self.eval_joints = [*range(0,21)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() - pose_coord, pose_score = self.heatmap_to_coord(hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) + if isinstance(self.heatmap_to_coord, list): + pose_coords_body_foot, pose_scores_body_foot = self.heatmap_to_coord[0]( + hm_data[i][self.eval_joints[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coords_face_hand, pose_scores_face_hand = self.heatmap_to_coord[1]( + hm_data[i][self.eval_joints[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) + pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) + else: + pose_coord, pose_score = self.heatmap_to_coord(hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ - pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) + pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area, use_heatmap_loss=self.use_heatmap_loss) _result = [] for k in range(len(scores)): @@ -145,7 +176,7 @@ def update(self): from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame - img = vis_frame(orig_img, result, self.opt) + img = vis_frame(orig_img, result, self.opt, self.vis_thres) self.write_image(img, im_name, stream=stream if self.save_video else None) def write_image(self, img, im_name, stream=None): diff --git a/alphapose/version.py b/alphapose/version.py index 6e610062..0777e6be 100644 --- a/alphapose/version.py +++ b/alphapose/version.py @@ -1,5 +1,5 @@ # GENERATED VERSION FILE -# TIME: Tue Aug 18 16:28:27 2020 +# TIME: Tue Aug 24 19:39:04 2021 -__version__ = '0.3.0+cbc364f' +__version__ = '0.3.0+5c44745' short_version = '0.3.0' diff --git a/configs/halpe_136/hardnet/256x192_hard68_lr1e-3_1x.yaml b/configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml similarity index 56% rename from configs/halpe_136/hardnet/256x192_hard68_lr1e-3_1x.yaml rename to configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml index a97c241f..6130afe2 100644 --- a/configs/halpe_136/hardnet/256x192_hard68_lr1e-3_1x.yaml +++ b/configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml @@ -1,9 +1,9 @@ DATASET: TRAIN: - TYPE: 'Halpe_136' + TYPE: 'coco_wholebody' ROOT: './data/coco/' IMG_PREFIX: 'train2017' - ANN: 'annotations/person_keypoints_train2017.json' + ANN: 'annotations/coco_wholebody_train_v1.0.json' AUG: FLIP: true ROT_FACTOR: 40 @@ -11,20 +11,21 @@ DATASET: NUM_JOINTS_HALF_BODY: 8 PROB_HALF_BODY: -1 VAL: - TYPE: 'Halpe_136' + TYPE: 'coco_wholebody' ROOT: './data/coco/' IMG_PREFIX: 'val2017' - ANN: 'annotations/person_keypoints_val2017.json' + ANN: 'annotations/coco_wholebody_val_v1.0.json' TEST: - TYPE: 'Halpe_136_det' + TYPE: 'coco_wholebody_det' ROOT: './data/coco/' IMG_PREFIX: 'val2017' DET_FILE: './exp/json/test_det_yolo.json' - ANN: 'annotations/person_keypoints_val2017.json' + ANN: 'annotations/coco_wholebody_val_v1.0.json' DATA_PRESET: TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' SIGMA: 2 - NUM_JOINTS: 136 + NUM_JOINTS: 133 IMAGE_SIZE: - 256 - 192 @@ -32,16 +33,27 @@ DATA_PRESET: - 64 - 48 MODEL: - TYPE: 'HarDNetPose' - INIT_WEIGHTS: '' + TYPE: 'FastPose' + BACKBONE: 'se-resnet' PRETRAINED: '' TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 152 + CONV_DIM: 256 FINAL_CONV_KERNEL: 1 - NUM_LAYERS: 68 - DOWN_RATIO: 4 - TRT: False + STAGE1: + NUM_CONV: 4 + STAGE2: + NUM_CONV: 2 + STAGE3: + NUM_CONV: 1 LOSS: - TYPE: 'MSELoss' + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False DETECTOR: NAME: 'yolo' CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' @@ -52,14 +64,14 @@ TRAIN: WORLD_SIZE: 4 BATCH_SIZE: 32 BEGIN_EPOCH: 0 - END_EPOCH: 200 + END_EPOCH: 270 OPTIMIZER: 'adam' LR: 0.001 LR_FACTOR: 0.1 LR_STEP: - - 90 - - 120 - DPG_MILESTONE: 140 + - 170 + - 200 + DPG_MILESTONE: 210 DPG_STEP: - - 160 - - 190 + - 230 + - 250 diff --git a/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml new file mode 100644 index 00000000..c48a413e --- /dev/null +++ b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml @@ -0,0 +1,75 @@ +DATASET: + TRAIN: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'train2017' + ANN: 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + ANN: 'annotations/coco_wholebody_val_v1.0.json' + TEST: + TYPE: 'coco_wholebody_det' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/coco_wholebody_val_v1.0.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'Combined' + SIGMA: 2 + NUM_JOINTS: 133 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 + CONV_DIM: 256 +LOSS: + TYPE: 'Combined' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False + LOSS_1: + TYPE: 'MSELoss' + LOSS_2: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 32 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml new file mode 100644 index 00000000..aac51544 --- /dev/null +++ b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml @@ -0,0 +1,84 @@ +DATASET: + TRAIN: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'train2017' + ANN: 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + ANN: 'annotations/coco_wholebody_val_v1.0.json' + TEST: + TYPE: 'coco_wholebody_det' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/coco_wholebody_val_v1.0.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'Combined' + SIGMA: 2 + NUM_JOINTS: 133 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + CONV_DIM: 256 + NUM_LAYERS: 50 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'Combined' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False + LOSS_1: + TYPE: 'MSELoss' + LOSS_2: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 32 + BEGIN_EPOCH: 0 + END_EPOCH: 320 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 190 + - 220 + DPG_MILESTONE: 230 + DPG_STEP: + - 260 + - 280 diff --git a/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml new file mode 100644 index 00000000..54bebabb --- /dev/null +++ b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml @@ -0,0 +1,78 @@ +DATASET: + TRAIN: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'train2017' + ANN: 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + ANN: 'annotations/coco_wholebody_val_v1.0.json' + TEST: + TYPE: 'coco_wholebody_det' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/coco_wholebody_val_v1.0.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 133 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 + CONV_DIM: 256 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml new file mode 100644 index 00000000..68b150cf --- /dev/null +++ b/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml @@ -0,0 +1,68 @@ +DATASET: + TRAIN: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'train2017' + ANN: 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'coco_wholebody' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + ANN: 'annotations/coco_wholebody_val_v1.0.json' + TEST: + TYPE: 'coco_wholebody_det' + ROOT: './data/coco/' + IMG_PREFIX: 'val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/coco_wholebody_val_v1.0.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 133 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 5 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml b/configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml new file mode 100644 index 00000000..0bb1e1a2 --- /dev/null +++ b/configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml @@ -0,0 +1,77 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 40 + SCALE_FACTOR: 0.3 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: -1 + VAL: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_136_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 136 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + BACKBONE: 'se-resnet' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 152 + CONV_DIM: 256 + FINAL_CONV_KERNEL: 1 + STAGE1: + NUM_CONV: 4 + STAGE2: + NUM_CONV: 2 + STAGE3: + NUM_CONV: 1 +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 32 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml new file mode 100644 index 00000000..024559b7 --- /dev/null +++ b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml @@ -0,0 +1,84 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_136_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'Combined' + SIGMA: 2 + NUM_JOINTS: 136 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + CONV_DIM: 256 + NUM_LAYERS: 50 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'Combined' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False + LOSS_1: + TYPE: 'MSELoss' + LOSS_2: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 0 + END_EPOCH: 320 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 190 + - 220 + DPG_MILESTONE: 230 + DPG_STEP: + - 260 + - 280 diff --git a/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml new file mode 100644 index 00000000..88a9e7a8 --- /dev/null +++ b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml @@ -0,0 +1,78 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_136_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 136 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 + CONV_DIM: 256 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml index 50bfe56f..4472a87d 100644 --- a/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml +++ b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml @@ -1,9 +1,9 @@ DATASET: TRAIN: TYPE: 'Halpe_136' - ROOT: '' - IMG_PREFIX: 'train2017' - ANN: '/home/group3/hico-coco.json' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' AUG: FLIP: true ROT_FACTOR: 45 @@ -12,15 +12,15 @@ DATASET: PROB_HALF_BODY: 0.3 VAL: TYPE: 'Halpe_136' - ROOT: './data/coco/' - IMG_PREFIX: 'val2017' - ANN: 'coco_val_full_finetuned.json' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' TEST: TYPE: 'Halpe_136_det' - ROOT: './data/coco/' - IMG_PREFIX: 'val2017' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' DET_FILE: './exp/json/test_det_yolo.json' - ANN: 'coco_val_full_finetuned.json' + ANN: 'annotations/halpe_val_v1.json' DATA_PRESET: TYPE: 'simple' LOSS_TYPE: 'L1JointRegression' @@ -34,7 +34,7 @@ DATA_PRESET: - 48 MODEL: TYPE: 'FastPose' - PRETRAINED: '/home/group3/newrepo/AlphaPose/exp/hico-coco-_regression-256x192_res50_lr1e-3_2x-regression-frei.yaml/model_3.pth' + PRETRAINED: '' TRY_LOAD: '' NUM_DECONV_FILTERS: - 256 diff --git a/configs/halpe_136/resnet/256x192_res50_lr1e-3_1x.yaml b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x.yaml similarity index 62% rename from configs/halpe_136/resnet/256x192_res50_lr1e-3_1x.yaml rename to configs/halpe_136/resnet/256x192_res50_lr1e-3_2x.yaml index b7a5c53a..6912ca7f 100644 --- a/configs/halpe_136/resnet/256x192_res50_lr1e-3_1x.yaml +++ b/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x.yaml @@ -1,26 +1,26 @@ DATASET: TRAIN: TYPE: 'Halpe_136' - ROOT: './data/coco/' - IMG_PREFIX: 'train2017' - ANN: 'annotations/person_keypoints_train2017.json' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' AUG: FLIP: true - ROT_FACTOR: 40 - SCALE_FACTOR: 0.3 + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 NUM_JOINTS_HALF_BODY: 8 - PROB_HALF_BODY: -1 + PROB_HALF_BODY: 0.3 VAL: TYPE: 'Halpe_136' - ROOT: './data/coco/' - IMG_PREFIX: 'val2017' - ANN: 'annotations/person_keypoints_val2017.json' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' TEST: TYPE: 'Halpe_136_det' - ROOT: './data/coco/' - IMG_PREFIX: 'val2017' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' DET_FILE: './exp/json/test_det_yolo.json' - ANN: 'annotations/person_keypoints_val2017.json' + ANN: 'annotations/halpe_val_v1.json' DATA_PRESET: TYPE: 'simple' SIGMA: 2 @@ -40,6 +40,7 @@ MODEL: - 256 - 256 NUM_LAYERS: 50 + CONV_DIM: 256 LOSS: TYPE: 'MSELoss' DETECTOR: @@ -52,14 +53,14 @@ TRAIN: WORLD_SIZE: 4 BATCH_SIZE: 32 BEGIN_EPOCH: 0 - END_EPOCH: 200 + END_EPOCH: 270 OPTIMIZER: 'adam' LR: 0.001 LR_FACTOR: 0.1 LR_STEP: - - 90 - - 120 - DPG_MILESTONE: 140 + - 170 + - 200 + DPG_MILESTONE: 210 DPG_STEP: - - 160 - - 190 + - 230 + - 250 diff --git a/configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml b/configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml index 4f1301fb..85af0bf1 100644 --- a/configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml +++ b/configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml @@ -1,9 +1,9 @@ DATASET: TRAIN: TYPE: 'Halpe_26' - ROOT: '' - IMG_PREFIX: 'train2017' - ANN: '/home/group3/hico_and_coco_26kpts.json' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' AUG: FLIP: true ROT_FACTOR: 40 @@ -12,15 +12,15 @@ DATASET: PROB_HALF_BODY: -1 VAL: TYPE: 'Halpe_26' - ROOT: '' - IMG_PREFIX: 'val2017' - ANN: '/home/group3/coco_val_full_26.json' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' TEST: - TYPE: 'Halpe_26' - ROOT: '' - IMG_PREFIX: 'val2017' + TYPE: 'Halpe_26_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' DET_FILE: './exp/json/test_det_yolo.json' - ANN: '/home/group3/coco_val_full_26.json' + ANN: 'annotations/halpe_val_v1.json' DATA_PRESET: TYPE: 'simple' SIGMA: 2 @@ -34,7 +34,7 @@ DATA_PRESET: MODEL: TYPE: 'FastPose' PRETRAINED: '' - TRY_LOAD: '/home/group3/AlphaPose/exp/pami_hico_and_coco-256x192_res50_lr1e-3_1x.yaml/model_6.pth' + TRY_LOAD: '' NUM_DECONV_FILTERS: - 256 - 256 diff --git a/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml b/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml new file mode 100644 index 00000000..eeff78d2 --- /dev/null +++ b/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml @@ -0,0 +1,78 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_26' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_26' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_26_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 26 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 + CONV_DIM: 256 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-regression.yaml b/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-regression.yaml new file mode 100644 index 00000000..2fc9e3ea --- /dev/null +++ b/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-regression.yaml @@ -0,0 +1,69 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_26' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_26' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_26_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 26 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 + CONV_DIM: 256 +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 5 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x.yaml b/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x.yaml new file mode 100644 index 00000000..184a3c85 --- /dev/null +++ b/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x.yaml @@ -0,0 +1,65 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_26' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_26' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_26_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + SIGMA: 2 + NUM_JOINTS: 26 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 +LOSS: + TYPE: 'MSELoss' +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 32 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 \ No newline at end of file diff --git a/configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml b/configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml new file mode 100644 index 00000000..2eff9ada --- /dev/null +++ b/configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml @@ -0,0 +1,84 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_68_noface' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/train2015' + ANN: 'annotations/halpe_train_v1.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_68_noface' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_68_noface_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'Combined' + SIGMA: 2 + NUM_JOINTS: 68 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + CONV_DIM: 256 + NUM_LAYERS: 50 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'Combined' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False + LOSS_1: + TYPE: 'MSELoss' + LOSS_2: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 5 + BATCH_SIZE: 52 + BEGIN_EPOCH: 0 + END_EPOCH: 120 + OPTIMIZER: 'adam' + LR: 0.0001 + LR_FACTOR: 0.1 + LR_STEP: + - 10 + - 30 + DPG_MILESTONE: 40 + DPG_STEP: + - 60 + - 90 \ No newline at end of file diff --git a/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml b/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml new file mode 100644 index 00000000..76e8e5d7 --- /dev/null +++ b/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml @@ -0,0 +1,90 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_coco_wholebody_136' + ROOT: + - './data/halpe/' + - './data/coco/' + IMG_PREFIX: + - 'images/train2015' + - 'train2017' + ANN: + - 'annotations/halpe_train_v1.json' + - 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_136_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'Combined' + SIGMA: 2 + NUM_JOINTS: 136 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 + CONV_DIM: 256 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'Combined' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False + LOSS_1: + TYPE: 'MSELoss' + LOSS_2: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 0 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml b/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml new file mode 100644 index 00000000..a2d4e4db --- /dev/null +++ b/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml @@ -0,0 +1,74 @@ +DATASET: + TRAIN: + TYPE: 'Halpe_coco_wholebody_136' + ROOT: + - './data/halpe/' + - './data/coco/' + IMG_PREFIX: + - 'images/train2015' + - 'train2017' + ANN: + - 'annotations/halpe_train_v1.json' + - 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 8 + PROB_HALF_BODY: 0.3 + VAL: + TYPE: 'Halpe_136' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'Halpe_136_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + SIGMA: 2 + NUM_JOINTS: 136 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + NUM_LAYERS: 50 +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 4 + BATCH_SIZE: 48 + BEGIN_EPOCH: 5 + END_EPOCH: 270 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 170 + - 200 + DPG_MILESTONE: 210 + DPG_STEP: + - 230 + - 250 diff --git a/configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml b/configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml new file mode 100644 index 00000000..f7649b1f --- /dev/null +++ b/configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml @@ -0,0 +1,85 @@ +DATASET: + TRAIN: + TYPE: 'SingleHand' + ROOT: + - './data/halpe/' + - './data/coco/' + IMG_PREFIX: + - 'images/train2015' + - 'train2017' + ANN: + - 'annotations/halpe_train_v1.json' + - 'annotations/coco_wholebody_train_v1.0.json' + AUG: + FLIP: true + ROT_FACTOR: 45 + SCALE_FACTOR: 0.35 + NUM_JOINTS_HALF_BODY: 0 + PROB_HALF_BODY: 0.0 + VAL: + TYPE: 'SingleHand' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + ANN: 'annotations/halpe_val_v1.json' + TEST: + TYPE: 'SingleHand_det' + ROOT: './data/halpe/' + IMG_PREFIX: 'images/val2017' + DET_FILE: './exp/json/test_det_yolo.json' + ANN: 'annotations/halpe_val_v1.json' +DATA_PRESET: + TYPE: 'simple' + LOSS_TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + SIGMA: 2 + NUM_JOINTS: 21 + IMAGE_SIZE: + - 256 + - 192 + HEATMAP_SIZE: + - 64 + - 48 +MODEL: + TYPE: 'FastPose' + PRETRAINED: '' + TRY_LOAD: '' + NUM_DECONV_FILTERS: + - 256 + - 256 + - 256 + CONV_DIM: 256 + NUM_LAYERS: 50 + DCN: + MODULATED: false + DEFORM_GROUP: 1 + FALLBACK_ON_STRIDE: false + STAGE_WITH_DCN: + - false + - true + - true + - true +LOSS: + TYPE: 'L1JointRegression' + NORM_TYPE: 'sigmoid' + OUTPUT_3D: False +DETECTOR: + NAME: 'yolo' + CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' + WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' + NMS_THRES: 0.6 + CONFIDENCE: 0.05 +TRAIN: + WORLD_SIZE: 6 + BATCH_SIZE: 48 + BEGIN_EPOCH: 0 + END_EPOCH: 100 + OPTIMIZER: 'adam' + LR: 0.001 + LR_FACTOR: 0.1 + LR_STEP: + - 15 + - 35 + DPG_MILESTONE: 50 + DPG_STEP: + - 65 + - 80 \ No newline at end of file diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 03db56b4..7ac6a68e 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -121,3 +121,29 @@ Download and extract them under `./data`, and make them look like this: |-- 045572740.jpg |-- ... ``` + +#### Halpe-FullBody +If you want to train the model by yourself, please download data from [Halpe-FullBody](https://github.com/Fang-Haoshu/Halpe-FullBody). Download and extract them under `./data`, and make them look like this: +``` +|-- json +|-- exp +|-- alphapose +|-- configs +|-- test +|-- data +`-- |-- halpe + `-- |-- annotations + | |-- halpe_train_v1.json + | `-- halpe_val_v1.json + |-- images + `-- |-- train2015 + | |-- HICO_train2015_00000001.jpg + | |-- HICO_train2015_00000002.jpg + | |-- HICO_train2015_00000003.jpg + | |-- ... + `-- val2017 + |-- 000000000139.jpg + |-- 000000000285.jpg + |-- 000000000632.jpg + |-- ... +``` diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index a1f2842c..af7097e4 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -22,28 +22,77 @@ ## [Halpe dataset](https://github.com/Fang-Haoshu/Halpe-FullBody) (26 keypoints) -| Model | Backbone | Detector | Input Size | AP | Speed | Download | Config | Training Log | -|--------------------------|----------|----------|------------|------------|-------|-----------|--------|--------------| -|[Fast Pose](../configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml) | ResNet50 | YOLOv3 | 256x192 | 69.0 | 3.54 iter/s | [Google](https://drive.google.com/file/d/1S-ROA28de-1zvLv-hVfPFJ5tFBYOSITb/view?usp=sharing) [Baidu](https://pan.baidu.com/s/1lvzMhoYgS6o6n8lVDx3GtQ) | [cfg](../configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml) | [log]() | +| Model | Backbone | Detector | Input Size | AP | Speed | Download | Config | +|--------------------------|----------|----------|------------|------------|-------|-----------|--------| +|[Fast Pose](../configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml) | ResNet50 | YOLOv3 | 256x192 | - | 13.12 iter/s | [Google](https://drive.google.com/file/d/1S-ROA28de-1zvLv-hVfPFJ5tFBYOSITb/view?usp=sharing) [Baidu](https://pan.baidu.com/s/1lvzMhoYgS6o6n8lVDx3GtQ) | [cfg](../configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml) | -You can run with: +For example, you can run with: ``` python scripts/demo_inference.py --cfg configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml --checkpoint pretrained_models/halpe26_fast_res50_256x192.pth --indir examples/demo/ --save_img ``` #### Notes -- More models coming soon! +- This model is trained based on the first 26 keypoints of Halpe Full-body datatset (without face and hand keypoints). +- The speed is tested on COCO val2017 on a single NVIDIA GeForce RTX 3090 gpu, with `batch_size=64` in each iteration and offline yolov3 human detection results. ## [Halpe dataset](https://github.com/Fang-Haoshu/Halpe-FullBody) (136 keypoints) -| Model | Backbone | Detector | Input Size | AP | Speed | Download | Config | Training Log | -|--------------------------|----------|----------|------------|------------|-------|-----------|--------|--------------| -|[Fast Pose](../configs/halpe_136/resnet/256x192_res50_lr1e-3_1x.yaml) | ResNet50 | YOLOv3 | 256x192 | 69.0 | 3.54 iter/s | [Google](https://drive.google.com/file/d/17vnGsMDbG4rf50kyj586BVJsiAspQv5v/view?usp=sharing) [Baidu](https://pan.baidu.com/s/1--9DsFjTyQrTMwsMjY7FGg) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | [log]() | +| Model | Backbone | Detector | Input Size | Loss Type | AP | Speed | Download | Config | +|--------------------------|----------|----------|------------|------------|------------|-------|-----------|--------| +|[Fast Pose](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x.yaml) | ResNet50 | YOLOv3 | 256x192 | Heatmap | 41.7 | 4.37 iter/s | [Google](https://drive.google.com/file/d/1LbKM2TOxKdpIZoDxCo6ldmOf62pw6z8A/view?usp=sharing) [Baidu(code: y8a0)](https://pan.baidu.com/s/1z1xKIyyet5y-rr7ZQSNX_A) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | +|[Fast Pose](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | ResNet50 | YOLOv3 | 256x192 | Symmetric Integral | 44.1 | 16.50 iter/s | [Google](https://drive.google.com/file/d/1_10JYI3O-VbrAiONfL36UxLf9UXMoUYA/view?usp=sharing) [Baidu(code: 9e4z)](https://pan.baidu.com/s/1lakMQbqIWdNV_Khm8Hfcpw) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | +|[Fast Pose (DCN)](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Symmetric Integral | 46.2 | 16.58 iter/s | [Google](https://drive.google.com/file/d/1S49aDYGVjEJpx4MnFu7TFzzsbp7Si6h_/view?usp=sharing) [Baidu(code: 0yyf)](https://pan.baidu.com/s/1Xx2XJLrds80tp9QEQclR_A) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml) | +|[Fast Pose (DCN)](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Combined | 45.4 | 10.07 iter/s | [Google](https://drive.google.com/file/d/1jt-V1Zh-eYgX_-2mrBTV9Ip6z7JjApEC/view?usp=sharing) [Baidu(code: hln3)](https://pan.baidu.com/s/1yZNora5LhH-6eeTEw2S15w) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | +|[Fast Pose (DCN)](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Combined (10 hand weight) | 47.2 | 10.07 iter/s | [Google](https://drive.google.com/file/d/1nL2KYqxSnSZH8c7PRr_d9KEFxCEiyjAR/view?usp=sharing) [Baidu(code: jkyc)](https://pan.baidu.com/s/1RdldnKY93xsh0eWzz8nmgg) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | +|[Fast Pose (DUC)](../configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml) | ResNet152 | YOLOv3 | 256x192 | Symmetric Integral | 45.1 | 16.17 iter/s | [Google](https://drive.google.com/file/d/1zZotfE3WsBe1BxKimlK56wwJuK9E4EDs/view?usp=sharing) [Baidu(code: gaxj)](https://pan.baidu.com/s/1Tm_pV88kFkfqmw2Rzov8xg) | [cfg](../configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml) | + +For example, you can run with: +``` +python scripts/demo_inference.py --cfg configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml --checkpoint pretrained_models/halpe136_fast50_regression_256x192.pth --indir examples/demo/ --save_img +``` + +#### Notes +- All of above models are trained only on Halpe Full-body dataset. +- The APs are tested under Halpe's criterion, with flip test on. +- Combined loss means we use heatmap loss (mse loss) on body and foot keypoints and use symmetric integral loss (l1 joint regression loss) on face and hand keypoints. +- There are two FastPose-DCN models with combined loss. The second one uses ten times of weight of hand keypoints, so it is more accurate on hand keypoints but less accurate on the other keypoints. +- The speed is tested on COCO val2017 on a single NVIDIA GeForce RTX 3090 gpu, with `batch_size=64` in each iteration and offline yolov3 human detection results. + +## [COCO WholeBody dataset](https://github.com/jin-s13/COCO-WholeBody) (133 keypoints) + +| Model | Backbone | Detector | Input Size | Loss Type | AP | Speed | Download | Config | +|--------------------------|----------|----------|------------|------------|------------|-------|-----------|--------| +|[Fast Pose](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | ResNet50 | YOLOv3 | 256x192 | Symmetric Integral | 55.4 | 17.42 iter/s | [Google](https://drive.google.com/file/d/1WQlwRw7KiKBI2Wyb-lvnQX29R29NbhLz/view?usp=sharing) [Baidu(code: nw03)](https://pan.baidu.com/s/1I1yXJXgKQEag5IUhc3xFGQ) | [cfg](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | +|[Fast Pose (DCN)](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Symmetric Integral | 57.7 | 16.70 iter/s | [Google](https://drive.google.com/file/d/10MgWM4rMORVaHNPyswal7RtrsehVV79X/view?usp=sharing) [Baidu(code: dq9k)](https://pan.baidu.com/s/1cz6lB-xIuwzBBFc1d7p67A) | [cfg](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml) | +|[Fast Pose](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml) | ResNet50 | YOLOv3 | 256x192 | Combined | 57.8 | 10.28 iter/s | [Google](https://drive.google.com/file/d/14wrc9q96bYqUc2efT8p8XzdTvLm-LwUT/view?usp=sharing) [Baidu(code: 7a56)](https://pan.baidu.com/s/1nML2nHn91-9n5B59axeYwA) | [cfg](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml) | +|[Fast Pose (DCN)](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Combined | 58.2 | 10.22 iter/s | [Google](https://drive.google.com/file/d/1aP0nYujw32H-VoJBVsXS-DsBBY-UwI8Y/view?usp=sharing) [Baidu(code: 99ee)](https://pan.baidu.com/s/1dbY6rELFy-ZTJptN5fsUqg) | [cfg](../configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | +|[Fast Pose (DUC)](../configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml) | ResNet152 | YOLOv3 | 256x192 | Symmetric Integral | 56.9 | 15.72 iter/s | [Google](https://drive.google.com/file/d/1ktBwkG1KL3_iFbPXAh5gua0zX92p-1KV/view?usp=sharing) [Baidu(code: jw3u)](https://pan.baidu.com/s/1TSI2JLk0o5lFPwGf216tNg) | [cfg](../configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml) | + +#### Notes +- All of above models are trained only on COCO WholeBody dataset. +- The APs are tested under COCO WholeBody's criterion, with flip test on. +- The speed is tested on COCO val2017 on a single NVIDIA GeForce RTX 3090 gpu, with `batch_size=64` in each iteration and offline yolov3 human detection results. + +## Multi Domain Models **(Strongly Recommended)** +| Model | Backbone | Detector | Input Size | Loss Type | AP | Speed | Download | Config | #keypoints | +|--------------------------|----------|----------|------------|------------|------------|-------|-----------|--------|--------------| +|[Fast Pose](../configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | ResNet50 | YOLOv3 | 256x192 | Symmetric Integral | 50.1 | 16.28 iter/s | [Google](https://drive.google.com/file/d/1Bb3kPoFFt-M0Y3ceqNO8DTXi1iNDd4gI/view?usp=sharing) [Baidu(code: d0wi)](https://pan.baidu.com/s/1GaHzMHTqYze2rVn7u1sjVg) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml) | 136 | +|[Fast Pose (DCN)](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Combined (10 hand weight) | 49.8 | 10.35 iter/s | [Google](https://drive.google.com/file/d/1wX1Z2ZOoysgSNovlgiEtJKpbR8tUBWYR/view?usp=sharing) [Baidu(code: app1)](https://pan.baidu.com/s/1bIro0XfYj0FIVf84QzdDoQ) | [cfg](../configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | 136 | +|[Fast Pose (DCN)](../configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | ResNet50 - dcn | YOLOv3 | 256x192 | Combined | - | 13.88 iter/s | [Google](https://drive.google.com/file/d/14Qn9gxm-EVzqFi7v25Y5TqKIvrFLy_BR/view?usp=sharing) [Baidu(code: 6kwr)](https://pan.baidu.com/s/1GLNxN3gfekUVY0HZu41fJQ) | [cfg](../configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml) | 68 (no face) | +|[Fast Pose (DCN)](../configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml) | ResNet50 - dcn | - | 256x192 | Symmetric Integral | - | 30.20 iter/s | [Google](https://drive.google.com/file/d/1MntndimlUP5Hxef1UN9ZDMBVglfA606J/view?usp=sharing) [Baidu(code: nwxx)](https://pan.baidu.com/s/1OR-uH25MFQ7kY8Gt_aJfbw ) | [cfg](../configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml) | 21 (single hand) | -You can run with: +For the most accurate wholebody pose estimation, you can run with: +``` +python scripts/demo_inference.py --cfg configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml --checkpoint pretrained_models/multi_domain_fast50_dcn_combined_256x192.pth --indir examples/demo/ --save_img +``` +or, you can run with (this version is a little faster and more accurate on body keypoints, but its performance on hand keypoints is worser): ``` -python scripts/demo_inference.py --cfg configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml --checkpoint pretrained_models/halpe136_fast_res50_256x192.pth --indir examples/demo/ --save_img +python scripts/demo_inference.py --cfg configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml --checkpoint pretrained_models/multi_domain_fast50_regression_256x192.pth --indir examples/demo/ --save_img ``` #### Notes -- More models coming soon! +- These models are strongly recommended because they are more accurate and flexible. +- These models are trained with multi-domain knowledge distillation (MDKD, see our [paper]() for more details). +- The APs are tested under Halpe's criterion, with flip test on. +- If you want to use the single hand model, you should give the rough bounding box of **a single hand** instead of that of a whole person. +- The speed is tested on COCO val2017 on a single NVIDIA GeForce RTX 3090 gpu, with `batch_size=64` in each iteration and offline yolov3 human detection results. diff --git a/scripts/demo_api.py b/scripts/demo_api.py index a0e879f7..d608d717 100644 --- a/scripts/demo_api.py +++ b/scripts/demo_api.py @@ -201,12 +201,22 @@ def update(self): self.eval_joints = [*range(0,136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0,26)] + elif hm_data.size()[1] == 133: + self.eval_joints = [*range(0,133)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() - pose_coord, pose_score = self.heatmap_to_coord(hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) + if isinstance(self.heatmap_to_coord, list): + pose_coords_body_foot, pose_scores_body_foot = self.heatmap_to_coord[0]( + hm_data[i][self.eval_joints[:-110]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coords_face_hand, pose_scores_face_hand = self.heatmap_to_coord[1]( + hm_data[i][self.eval_joints[-110:]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) + pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) + else: + pose_coord, pose_score = self.heatmap_to_coord(hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) diff --git a/scripts/demo_inference.py b/scripts/demo_inference.py index 9e8fa9e5..a4958cad 100644 --- a/scripts/demo_inference.py +++ b/scripts/demo_inference.py @@ -17,6 +17,7 @@ from alphapose.models import builder from alphapose.utils.config import update_config from alphapose.utils.detector import DetectionLoader +from alphapose.utils.file_detector import FileDetectionLoader from alphapose.utils.transforms import flip, flip_heatmap from alphapose.utils.vis import getTime from alphapose.utils.webcam_detector import WebCamDetectionLoader @@ -56,7 +57,7 @@ help='min box area to filter out') parser.add_argument('--detbatch', type=int, default=5, help='detection batch size PER GPU') -parser.add_argument('--posebatch', type=int, default=80, +parser.add_argument('--posebatch', type=int, default=64, help='pose estimation maximum batch size PER GPU') parser.add_argument('--eval', dest='eval', default=False, action='store_true', help='save the result json as coco format, using image index(int) instead of image name(str)') diff --git a/scripts/train.py b/scripts/train.py index 37dcfd6d..d0dae2e6 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,6 +1,7 @@ """Script for multi-gpu training.""" import json import os +import sys import numpy as np import torch @@ -26,6 +27,9 @@ def train(opt, train_loader, m, criterion, optimizer, writer): loss_logger = DataLogger() acc_logger = DataLogger() + + combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') + m.train() norm_type = cfg.LOSS.get('NORM_TYPE', None) @@ -36,14 +40,46 @@ def train(opt, train_loader, m, criterion, optimizer, writer): inps = [inp.cuda().requires_grad_() for inp in inps] else: inps = inps.cuda().requires_grad_() - labels = labels.cuda() - label_masks = label_masks.cuda() + if isinstance(labels, list): + labels = [label.cuda() for label in labels] + label_masks = [label_mask.cuda() for label_mask in label_masks] + else: + labels = labels.cuda() + label_masks = label_masks.cuda() output = m(inps) if cfg.LOSS.get('TYPE') == 'MSELoss': loss = 0.5 * criterion(output.mul(label_masks), labels.mul(label_masks)) acc = calc_accuracy(output.mul(label_masks), labels.mul(label_masks)) + elif cfg.LOSS.get('TYPE') == 'Combined': + if output.size()[1] == 68: + face_hand_num = 42 + else: + face_hand_num = 110 + + output_body_foot = output[:, :-face_hand_num, :, :] + output_face_hand = output[:, -face_hand_num:, :, :] + num_body_foot = output_body_foot.shape[1] + num_face_hand = output_face_hand.shape[1] + + label_masks_body_foot = label_masks[0] + label_masks_face_hand = label_masks[1] + + labels_body_foot = labels[0] + labels_face_hand = labels[1] + + loss_body_foot = 0.5 * criterion[0](output_body_foot.mul(label_masks_body_foot), labels_body_foot.mul(label_masks_body_foot)) + acc_body_foot = calc_accuracy(output_body_foot.mul(label_masks_body_foot), labels_body_foot.mul(label_masks_body_foot)) + + loss_face_hand = criterion[1](output_face_hand, labels_face_hand, label_masks_face_hand) + acc_face_hand = calc_integral_accuracy(output_face_hand, labels_face_hand, label_masks_face_hand, output_3d=False, norm_type=norm_type) + + loss_body_foot *= 100 + loss_face_hand *= 0.01 + + loss = loss_body_foot + loss_face_hand + acc = acc_body_foot * num_body_foot / (num_body_foot + num_face_hand) + acc_face_hand * num_face_hand / (num_body_foot + num_face_hand) else: loss = criterion(output, labels, label_masks) acc = calc_integral_accuracy(output, labels, label_masks, output_3d=False, norm_type=norm_type) @@ -92,6 +128,9 @@ def validate(m, opt, heatmap_to_coord, batch_size=20): norm_type = cfg.LOSS.get('NORM_TYPE', None) hm_size = cfg.DATA_PRESET.HEATMAP_SIZE + combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') + + halpe = (cfg.DATA_PRESET.NUM_JOINTS == 133) or (cfg.DATA_PRESET.NUM_JOINTS == 136) for inps, crop_bboxes, bboxes, img_ids, scores, imghts, imgwds in tqdm(det_loader, dynamic_ncols=True): if isinstance(inps, list): @@ -104,10 +143,23 @@ def validate(m, opt, heatmap_to_coord, batch_size=20): assert pred.dim() == 4 pred = pred[:, eval_joints, :, :] + if output.size()[1] == 68: + face_hand_num = 42 + else: + face_hand_num = 110 + for i in range(output.shape[0]): bbox = crop_bboxes[i].tolist() - pose_coords, pose_scores = heatmap_to_coord( - pred[i][det_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type) + if combined_loss: + pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0]( + pred[i][det_dataset.EVAL_JOINTS[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1]( + pred[i][det_dataset.EVAL_JOINTS[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coords = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) + pose_scores = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) + else: + pose_coords, pose_scores = heatmap_to_coord( + pred[i][det_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type) keypoints = np.concatenate((pose_coords, pose_scores), axis=1) keypoints = keypoints.reshape(-1).tolist() @@ -115,15 +167,17 @@ def validate(m, opt, heatmap_to_coord, batch_size=20): data = dict() data['bbox'] = bboxes[i, 0].tolist() data['image_id'] = int(img_ids[i]) - data['score'] = float(scores[i] + np.mean(pose_scores) + np.max(pose_scores)) + data['score'] = float(scores[i] + np.mean(pose_scores) + 1.25 * np.max(pose_scores)) data['category_id'] = 1 data['keypoints'] = keypoints kpt_json.append(data) + sysout = sys.stdout with open(os.path.join(opt.work_dir, 'test_kpt.json'), 'w') as fid: json.dump(kpt_json, fid) - res = evaluate_mAP(os.path.join(opt.work_dir, 'test_kpt.json'), ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN)) + res = evaluate_mAP(os.path.join(opt.work_dir, 'test_kpt.json'), ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN), halpe=halpe) + sys.stdout = sysout return res @@ -138,6 +192,9 @@ def validate_gt(m, opt, cfg, heatmap_to_coord, batch_size=20): norm_type = cfg.LOSS.get('NORM_TYPE', None) hm_size = cfg.DATA_PRESET.HEATMAP_SIZE + combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') + + halpe = (cfg.DATA_PRESET.NUM_JOINTS == 133) or (cfg.DATA_PRESET.NUM_JOINTS == 136) for inps, labels, label_masks, img_ids, bboxes in tqdm(gt_val_loader, dynamic_ncols=True): if isinstance(inps, list): @@ -150,10 +207,23 @@ def validate_gt(m, opt, cfg, heatmap_to_coord, batch_size=20): assert pred.dim() == 4 pred = pred[:, eval_joints, :, :] + if output.size()[1] == 68: + face_hand_num = 42 + else: + face_hand_num = 110 + for i in range(output.shape[0]): bbox = bboxes[i].tolist() - pose_coords, pose_scores = heatmap_to_coord( - pred[i][gt_val_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type) + if combined_loss: + pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0]( + pred[i][gt_val_dataset.EVAL_JOINTS[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1]( + pred[i][gt_val_dataset.EVAL_JOINTS[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type) + pose_coords = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) + pose_scores = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) + else: + pose_coords, pose_scores = heatmap_to_coord( + pred[i][gt_val_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type) keypoints = np.concatenate((pose_coords, pose_scores), axis=1) keypoints = keypoints.reshape(-1).tolist() @@ -161,15 +231,17 @@ def validate_gt(m, opt, cfg, heatmap_to_coord, batch_size=20): data = dict() data['bbox'] = bboxes[i].tolist() data['image_id'] = int(img_ids[i]) - data['score'] = float(np.mean(pose_scores) + np.max(pose_scores)) + data['score'] = float(np.mean(pose_scores) + 1.25 * np.max(pose_scores)) data['category_id'] = 1 data['keypoints'] = keypoints kpt_json.append(data) + sysout = sys.stdout with open(os.path.join(opt.work_dir, 'test_gt_kpt.json'), 'w') as fid: json.dump(kpt_json, fid) - res = evaluate_mAP(os.path.join(opt.work_dir, 'test_gt_kpt.json'), ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN)) + res = evaluate_mAP(os.path.join(opt.work_dir, 'test_gt_kpt.json'), ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN), halpe=halpe) + sys.stdout = sysout return res @@ -184,7 +256,13 @@ def main(): m = preset_model(cfg) m = nn.DataParallel(m).cuda() - criterion = builder.build_loss(cfg.LOSS).cuda() + combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') + if combined_loss: + criterion1 = builder.build_loss(cfg.LOSS.LOSS_1).cuda() + criterion2 = builder.build_loss(cfg.LOSS.LOSS_2).cuda() + criterion = [criterion1, criterion2] + else: + criterion = builder.build_loss(cfg.LOSS).cuda() if cfg.TRAIN.OPTIMIZER == 'adam': optimizer = torch.optim.Adam(m.parameters(), lr=cfg.TRAIN.LR) diff --git a/scripts/validate.py b/scripts/validate.py index a5098337..606fbe94 100644 --- a/scripts/validate.py +++ b/scripts/validate.py @@ -5,13 +5,13 @@ import numpy as np import torch from tqdm import tqdm +import sys from alphapose.models import builder from alphapose.utils.config import update_config from alphapose.utils.metrics import evaluate_mAP from alphapose.utils.transforms import (flip, flip_heatmap, get_func_heatmap_to_coord) -from alphapose.utils.pPose_nms import oks_pose_nms parser = argparse.ArgumentParser(description='AlphaPose Validate') @@ -25,9 +25,11 @@ type=str) parser.add_argument('--gpus', help='gpus', + default='0', type=str) parser.add_argument('--batch', help='validation batch size', + default=32, type=int) parser.add_argument('--flip-test', default=False, @@ -36,6 +38,16 @@ action='store_true') parser.add_argument('--detector', dest='detector', help='detector name', default="yolo") +parser.add_argument('--oks-nms', + default=False, + dest='oks_nms', + help='use oks nms', + action='store_true') +parser.add_argument('--ppose-nms', + default=False, + dest='ppose_nms', + help='use pPose nms, recommended', + action='store_true') opt = parser.parse_args() cfg = update_config(opt.cfg) @@ -56,6 +68,9 @@ def validate(m, heatmap_to_coord, batch_size=20): norm_type = cfg.LOSS.get('NORM_TYPE', None) hm_size = cfg.DATA_PRESET.HEATMAP_SIZE + combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') + + halpe = (cfg.DATA_PRESET.NUM_JOINTS == 133) or (cfg.DATA_PRESET.NUM_JOINTS == 136) for inps, crop_bboxes, bboxes, img_ids, scores, imghts, imgwds in tqdm(det_loader, dynamic_ncols=True): if isinstance(inps, list): @@ -72,15 +87,32 @@ def validate(m, heatmap_to_coord, batch_size=20): pred_flip = output_flip[:, eval_joints, :, :] else: output_flip = None + pred_flip = None pred = output assert pred.dim() == 4 pred = pred[:, eval_joints, :, :] + if output.size()[1] == 68: + face_hand_num = 42 + else: + face_hand_num = 110 + for i in range(output.shape[0]): bbox = crop_bboxes[i].tolist() - pose_coords, pose_scores = heatmap_to_coord( - pred[i], bbox, hms_flip=pred_flip[i], hm_shape=hm_size, norm_type=norm_type) + if combined_loss: + pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0]( + pred[i][det_dataset.EVAL_JOINTS[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type, + hms_flip=pred_flip[i][det_dataset.EVAL_JOINTS[:-face_hand_num]] if pred_flip is not None else None) + pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1]( + pred[i][det_dataset.EVAL_JOINTS[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type, + hms_flip=pred_flip[i][det_dataset.EVAL_JOINTS[-face_hand_num:]] if pred_flip is not None else None) + pose_coords = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) + pose_scores = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) + else: + pose_coords, pose_scores = heatmap_to_coord( + pred[i][det_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type, + hms_flip=pred_flip[i][det_dataset.EVAL_JOINTS] if pred_flip is not None else None) keypoints = np.concatenate((pose_coords, pose_scores), axis=1) keypoints = keypoints.reshape(-1).tolist() @@ -89,19 +121,53 @@ def validate(m, heatmap_to_coord, batch_size=20): data['bbox'] = bboxes[i, 0].tolist() data['image_id'] = int(img_ids[i]) data['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - # data['score'] = float(scores[i] + np.mean(pose_scores) + np.max(pose_scores)) - data['score'] = float(scores[i]) + data['score'] = float(scores[i] + np.mean(pose_scores) + 1.25 * np.max(pose_scores)) + # data['score'] = float(scores[i]) data['category_id'] = 1 data['keypoints'] = keypoints kpt_json.append(data) - kpt_json = oks_pose_nms(kpt_json) - - with open('./exp/json/validate_rcnn_kpt.json', 'w') as fid: - json.dump(kpt_json, fid) - res = evaluate_mAP('./exp/json/validate_rcnn_kpt.json', ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN)) - return res['AP'] + if opt.ppose_nms: + from alphapose.utils.pPose_nms import ppose_nms_validate_preprocess, pose_nms, write_json + final_result = [] + tmp_data = ppose_nms_validate_preprocess(kpt_json) + for key in tmp_data: + boxes, scores, ids, preds_img, preds_scores = tmp_data[key] + boxes, scores, ids, preds_img, preds_scores, pick_ids = \ + pose_nms(boxes, scores, ids, preds_img, preds_scores, 0, cfg.LOSS.get('TYPE') == 'MSELoss') + + _result = [] + for k in range(len(scores)): + _result.append( + { + 'keypoints':preds_img[k], + 'kp_score':preds_scores[k], + 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), + 'idx':ids[k], + 'box':[boxes[k][0], boxes[k][1], boxes[k][2]-boxes[k][0],boxes[k][3]-boxes[k][1]] + } + ) + im_name = str(key).zfill(12) + '.jpg' + result = { + 'imgname': im_name, + 'result': _result + } + final_result.append(result) + + write_json(final_result, './exp/json/', form='coco', for_eval=True, outputfile='validate_rcnn_kpt.json') + else: + if opt.oks_nms: + from alphapose.utils.pPose_nms import oks_pose_nms + kpt_json = oks_pose_nms(kpt_json) + + with open('./exp/json/validate_rcnn_kpt.json', 'w') as fid: + json.dump(kpt_json, fid) + + sysout = sys.stdout + res = evaluate_mAP('./exp/json/validate_rcnn_kpt.json', ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.TEST.ROOT, cfg.DATASET.TEST.ANN), halpe=halpe) + sys.stdout = sysout + return res def validate_gt(m, cfg, heatmap_to_coord, batch_size=20): @@ -115,6 +181,9 @@ def validate_gt(m, cfg, heatmap_to_coord, batch_size=20): norm_type = cfg.LOSS.get('NORM_TYPE', None) hm_size = cfg.DATA_PRESET.HEATMAP_SIZE + combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') + + halpe = (cfg.DATA_PRESET.NUM_JOINTS == 133) or (cfg.DATA_PRESET.NUM_JOINTS == 136) for inps, labels, label_masks, img_ids, bboxes in tqdm(gt_val_loader, dynamic_ncols=True): if isinstance(inps, list): @@ -131,15 +200,32 @@ def validate_gt(m, cfg, heatmap_to_coord, batch_size=20): pred_flip = output_flip[:, eval_joints, :, :] else: output_flip = None + pred_flip = None pred = output assert pred.dim() == 4 pred = pred[:, eval_joints, :, :] + if output.size()[1] == 68: + face_hand_num = 42 + else: + face_hand_num = 110 + for i in range(output.shape[0]): bbox = bboxes[i].tolist() - pose_coords, pose_scores = heatmap_to_coord( - pred[i], bbox, hms_flip=pred_flip[i], hm_shape=hm_size, norm_type=norm_type) + if combined_loss: + pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0]( + pred[i][gt_val_dataset.EVAL_JOINTS[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type, + hms_flip=pred_flip[i][gt_val_dataset.EVAL_JOINTS[:-face_hand_num]] if pred_flip is not None else None) + pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1]( + pred[i][gt_val_dataset.EVAL_JOINTS[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type, + hms_flip=pred_flip[i][gt_val_dataset.EVAL_JOINTS[-face_hand_num:]] if pred_flip is not None else None) + pose_coords = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) + pose_scores = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) + else: + pose_coords, pose_scores = heatmap_to_coord( + pred[i][gt_val_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type, + hms_flip=pred_flip[i][gt_val_dataset.EVAL_JOINTS] if pred_flip is not None else None) keypoints = np.concatenate((pose_coords, pose_scores), axis=1) keypoints = keypoints.reshape(-1).tolist() @@ -147,16 +233,18 @@ def validate_gt(m, cfg, heatmap_to_coord, batch_size=20): data = dict() data['bbox'] = bboxes[i].tolist() data['image_id'] = int(img_ids[i]) - data['score'] = float(np.mean(pose_scores) + np.max(pose_scores)) + data['score'] = float(np.mean(pose_scores) + 1.25 * np.max(pose_scores)) data['category_id'] = 1 data['keypoints'] = keypoints kpt_json.append(data) + sysout = sys.stdout with open('./exp/json/validate_gt_kpt.json', 'w') as fid: json.dump(kpt_json, fid) - res = evaluate_mAP('./exp/json/validate_gt_kpt.json', ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN)) - return res['AP'] + res = evaluate_mAP('./exp/json/validate_gt_kpt.json', ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.VAL.ROOT, cfg.DATASET.VAL.ANN), halpe=halpe) + sys.stdout = sysout + return res if __name__ == "__main__": @@ -171,4 +259,4 @@ def validate_gt(m, cfg, heatmap_to_coord, batch_size=20): with torch.no_grad(): gt_AP = validate_gt(m, cfg, heatmap_to_coord, opt.batch) detbox_AP = validate(m, heatmap_to_coord, opt.batch) - print('##### gt box: {} mAP | det box: {} mAP #####'.format(gt_AP, detbox_AP)) + print('##### gt box: {} mAP | det box: {} mAP #####'.format(gt_AP, detbox_AP)) \ No newline at end of file diff --git a/setup.py b/setup.py index d1e397d3..8a605038 100644 --- a/setup.py +++ b/setup.py @@ -163,13 +163,13 @@ def get_install_requires(): 'six', 'terminaltables', 'scipy==1.1.0', 'opencv-python', 'matplotlib', 'visdom', 'tqdm', 'tensorboardx', 'easydict', - 'pyyaml', + 'pyyaml', 'halpecocotools', 'torch>=1.1.0', 'torchvision>=0.3.0', 'munkres', 'timm==0.1.20', 'natsort' ] # official pycocotools doesn't support Windows, we will install it by third-party git repository later if platform.system() != 'Windows': - install_requires.append('pycocotools==2.0.0') + install_requires.append('pycocotools') return install_requires