-
Notifications
You must be signed in to change notification settings - Fork 0
/
mosse.py
117 lines (87 loc) · 3.56 KB
/
mosse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from typing import List
import cv2
import numpy as np
from utils import (
BoundingBox,
MosseResult,
gauss_reponse,
normalize_range,
preprocess,
random_affine_transform,
)
class Mosse:
def __init__(self, sigma: float = 10, learning_rate: float = 0.125):
self.sigma = sigma
self.learning_rate = learning_rate
self.first_frame = True
self.clipped = False
def init(
self,
frame: np.ndarray,
bbox: BoundingBox,
pretrain_iters: int = 128,
) -> List[MosseResult]:
"""Pretrain the filter on the first frame."""
self.bbox = bbox
self.search_win_h = self.bbox.h
self.search_win_w = self.bbox.w
results = []
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.float32)
response = gauss_reponse(*frame.shape, bbox.x, bbox.y)
g = response[self.bbox.top : self.bbox.bottom, self.bbox.left : self.bbox.right]
f = frame[self.bbox.top : self.bbox.bottom, self.bbox.left : self.bbox.right]
G = np.fft.fft2(g)
f = preprocess(f)
F = np.fft.fft2(f)
self.A_i = G * np.conj(F)
self.B_i = F * np.conj(F)
results.append(MosseResult(frame, self.bbox, response, self.A_i, self.B_i, f))
for _ in range(pretrain_iters):
trans_frame, bbox = random_affine_transform(frame, self.bbox)
response = gauss_reponse(*trans_frame.shape, bbox.object_x, bbox.object_y)
g = response[bbox.top : bbox.bottom, bbox.left : bbox.right]
f = trans_frame[bbox.top : bbox.bottom, bbox.left : bbox.right]
G = np.fft.fft2(g)
f = preprocess(f)
F = np.fft.fft2(f)
self.A_i += G * np.conj(F)
self.B_i += F * np.conj(F)
results.append(
MosseResult(trans_frame, self.bbox, response, self.A_i, self.B_i, f)
)
self.A_i *= self.learning_rate
self.B_i *= self.learning_rate
self.H_i = self.A_i / self.B_i
results.append(MosseResult(frame, self.bbox, response, self.A_i, self.B_i, f))
return results
def update(self, frame: np.ndarray) -> BoundingBox:
"""Update the tracker with the new frame."""
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.float32)
f = frame[self.bbox.top : self.bbox.bottom, self.bbox.left : self.bbox.right]
f = preprocess(f)
if f.shape != (self.search_win_h, self.search_win_w):
f = cv2.resize(f, (self.search_win_w, self.search_win_h))
self.clipped = False
G = self.H_i * np.fft.fft2(f)
g = normalize_range(np.fft.ifft2(G))
max_pos = np.where(g == g.max())
self.bbox.x = max_pos[1].item() + self.bbox.left
self.bbox.y = max_pos[0].item() + self.bbox.top
self.clipped = self.bbox.clip(
*frame.shape, self.search_win_h, self.search_win_w
)
f = frame[self.bbox.top : self.bbox.bottom, self.bbox.left : self.bbox.right]
f = preprocess(f)
if f.shape != (self.search_win_h, self.search_win_w):
f = cv2.resize(f, (self.search_win_w, self.search_win_h))
G = self.H_i * np.fft.fft2(f)
self.A_i = (
self.learning_rate * (G * np.conj(np.fft.fft2(f)))
+ (1 - self.learning_rate) * self.A_i
)
self.B_i = (
self.learning_rate * (np.fft.fft2(f) * np.conj(np.fft.fft2(f)))
+ (1 - self.learning_rate) * self.B_i
)
self.H_i = self.A_i / self.B_i
return self.bbox