-
Notifications
You must be signed in to change notification settings - Fork 116
/
Copy pathvid_fgfa.py
83 lines (68 loc) · 3.23 KB
/
vid_fgfa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from PIL import Image
import sys
import numpy as np
from .vid import VIDDataset
from mega_core.config import cfg
class VIDFGFADataset(VIDDataset):
def __init__(self, image_set, data_dir, img_dir, anno_path, img_index, transforms, is_train=True):
super(VIDFGFADataset, self).__init__(image_set, data_dir, img_dir, anno_path, img_index, transforms, is_train=is_train)
if not self.is_train:
self.start_index = []
for id, image_index in enumerate(self.image_set_index):
frame_id = int(image_index.split("/")[-1])
if frame_id == 0:
self.start_index.append(id)
def _get_train(self, idx):
filename = self.image_set_index[idx]
img = Image.open(self._img_dir % filename).convert("RGB")
# if a video dataset
img_refs = []
if hasattr(self, "pattern"):
offsets = np.random.choice(cfg.MODEL.VID.FGFA.MAX_OFFSET - cfg.MODEL.VID.FGFA.MIN_OFFSET + 1, cfg.MODEL.VID.FGFA.REF_NUM, replace=False) + cfg.MODEL.VID.FGFA.MIN_OFFSET
for i in range(len(offsets)):
ref_id = min(max(self.frame_seg_id[idx] + offsets[i], 0), self.frame_seg_len[idx] - 1)
ref_filename = self.pattern[idx] % ref_id
img_ref = Image.open(self._img_dir % ref_filename).convert("RGB")
img_refs.append(img_ref)
else:
for i in range(cfg.MODEL.VID.FGFA.REF_NUM):
img_refs.append(img.copy())
target = self.get_groundtruth(idx)
target = target.clip_to_image(remove_empty=True)
if self.transforms is not None:
img, target = self.transforms(img, target)
for i in range(len(img_refs)):
img_refs[i], _ = self.transforms(img_refs[i], None)
images = {}
images["cur"] = img
images["ref"] = img_refs
return images, target, idx
def _get_test(self, idx):
filename = self.image_set_index[idx]
img = Image.open(self._img_dir % filename).convert("RGB")
# give the current frame a category. 0 for start, 1 for normal
frame_id = int(filename.split("/")[-1])
frame_category = 0
if frame_id != 0:
frame_category = 1
img_refs = []
# reading other images of the queue (not necessary to be the last one, but last one here)
ref_id = min(self.frame_seg_len[idx] - 1, frame_id + cfg.MODEL.VID.FGFA.MAX_OFFSET)
ref_filename = self.pattern[idx] % ref_id
img_ref = Image.open(self._img_dir % ref_filename).convert("RGB")
img_refs.append(img_ref)
target = self.get_groundtruth(idx)
target = target.clip_to_image(remove_empty=True)
if self.transforms is not None:
img, target = self.transforms(img, target)
for i in range(len(img_refs)):
img_refs[i], _ = self.transforms(img_refs[i], None)
images = {}
images["cur"] = img
images["ref"] = img_refs
images["frame_category"] = frame_category
images["seg_len"] = self.frame_seg_len[idx]
images["pattern"] = self.pattern[idx]
images["img_dir"] = self._img_dir
images["transforms"] = self.transforms
return images, target, idx