-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemplateMatchingCUDA.cu
163 lines (145 loc) · 4.94 KB
/
templateMatchingCUDA.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include"templateMatchingCUDA.cuh"
#include"initANDcheck.h"
using namespace std;
using namespace chrono;
__global__ void kernel_1(float *ObjRecon_gpu, int height, int width, float *image2D_XY_gpu)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;//row cycle
const int j = blockDim.y * blockIdx.y + threadIdx.y;//col cycle
if (i < 200 && j < 200)
{
image2D_XY_gpu[i * 200 + j] = ObjRecon_gpu[i * 200 + j];
for (int b = 0; b < 50; b++)//Band Cycle
{
if (image2D_XY_gpu[i * 200 + j] < ObjRecon_gpu[b * 200 * 200 + i * 200 + j])
{
image2D_XY_gpu[i * 200 + j] = ObjRecon_gpu[b * 200 * 200 + i * 200 + j];
}
}
}
}
__global__ void kernel_2(float *image2D_XY_gpu, int total, double image2D_XY_mean, float *img2DBW_XY_gpu)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < total)
{
if (image2D_XY_gpu[i] > image2D_XY_mean)
img2DBW_XY_gpu[i] = 1.0;
else
img2DBW_XY_gpu[i] = 0.0;
}
}
__global__ void kernel_3(float *template_roXY_gpu, float *img2DBW_XY_gpu, int rotationAngleXY_size, double *err_XY_gpu)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < rotationAngleXY_size)
{
//Calculate the mean square error of two matrices
double sum_temp = 0;
for (int j = 0; j < 200; j++)//row cycle
{
for (int k = 0; k < 200; k++)//col cycle
{
sum_temp += (template_roXY_gpu[i * 200 * 200 + j * 200 + k] - img2DBW_XY_gpu[j * 200 + k])*
(template_roXY_gpu[i * 200 * 200 + j * 200 + k] - img2DBW_XY_gpu[j * 200 + k]);
}
}
err_XY_gpu[i] = sum_temp / (200 * 200);
}
}
__global__ void kernel_4(float *imageRotated3D_gpu, float *image2D_YZ_gpu)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;//Band Cycle
const int j = blockDim.y * blockIdx.y + threadIdx.y;//row cycle
if (i < 50 && j < 200)
{
image2D_YZ_gpu[i * 200 + j] = -FLT_MAX;
for (int k = 0; k < 200; k++)//col cycle£¬find the maximum value of a row
{
if (image2D_YZ_gpu[i * 200 + j] < imageRotated3D_gpu[i * 200 * 200 + j * 200 + k])
{
image2D_YZ_gpu[i * 200 + j] = imageRotated3D_gpu[i * 200 * 200 + j * 200 + k];
}
}
}
}
__global__ void kernel_5(float *image2D_YZ_gpu, double image2D_YZ_mean, float *img2DBW_YZ_gpu)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < 200 * 50)
{
if (image2D_YZ_gpu[i] > image2D_YZ_mean)
img2DBW_YZ_gpu[i] = 1.0;
else
img2DBW_YZ_gpu[i] = 0.0;
}
}
__global__ void kernel_6(float *template_roYZ_gpu, float *img2DBW_YZ_gpu, int rotationAngleYZ_size, double *err_YZ_gpu)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < rotationAngleYZ_size)
{
//Calculate the mean square error of two matrices
double sum_temp = 0;
for (int j = 0; j < 200; j++)//row
{
for (int k = 0; k < 50; k++)//col
{
sum_temp += (template_roYZ_gpu[i * 200 * 50 + j * 50 + k] - img2DBW_YZ_gpu[k * 200 + j])*
(template_roYZ_gpu[i * 200 * 50 + j * 50 + k] - img2DBW_YZ_gpu[k * 200 + j]);
}
}
err_YZ_gpu[i] = sum_temp / (200 * 50);
}
}
__global__ void kernel_7(float *imageRotated3D_gpu, float *imageRotated3D_gpu_1)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;
const int j = blockDim.y * blockIdx.y + threadIdx.y;
const int k = blockDim.z * blockIdx.z + threadIdx.z;
if (i < 200 && j < 200 && k < 50)
{
imageRotated3D_gpu_1[i * 200 * 50 + j * 50 + k] = imageRotated3D_gpu[(49 - k) * 200 * 200 + (199 - j) * 200 + i];
}
}
__global__ void kernel_8(float *imageRotated3D_gpu_2, float *imageRotated3D_gpu)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;
const int j = blockDim.y * blockIdx.y + threadIdx.y;
const int k = blockDim.z * blockIdx.z + threadIdx.z;
if (i < 200 && j < 200 && k < 50)
{
imageRotated3D_gpu[(49 - k) * 200 * 200 + (199 - j) * 200 + i] = imageRotated3D_gpu_2[i * 200 * 50 + j * 50 + k];
}
}
__global__ void kernel_9(float *imageRotated3D_gpu, double imageRotated3D_x_mean, int *BWObjRecon_gpu)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < 200 * 200 * 50)
{
if (imageRotated3D_gpu[i] > imageRotated3D_x_mean)
BWObjRecon_gpu[i] = 1;
else
BWObjRecon_gpu[i] = 0;
}
}
__global__ void kernel_10(float *imageRotated3D_gpu, float *ObjReconRed_gpu, int XObj, int YObj, int ZObj, int CentroID0, int CentroID2)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;//XObj
const int y = blockDim.y * blockIdx.y + threadIdx.y;//YObj
const int z = blockDim.z * blockIdx.z + threadIdx.z;//ZObj
if (z < ZObj && x < XObj && y < YObj)
{
ObjReconRed_gpu[z*XObj*YObj + y * XObj + x] = imageRotated3D_gpu[z * 200 * 200 + (CentroID0 - 61 + y) * 200 + CentroID2 - 38 + x];
}
}
__global__ void kernel_11(float *imageRotated3D_gpu, float *ObjReconRed_gpu, int XObj, int YObj, int ZObj, int Corner0, int Corner2)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;//XObj
const int y = blockDim.y * blockIdx.y + threadIdx.y;//YObj
const int z = blockDim.z * blockIdx.z + threadIdx.z;//ZObj
if (z < ZObj && x < XObj && y < YObj)
{
ObjReconRed_gpu[z*XObj*YObj + y * XObj + x] = imageRotated3D_gpu[z * 200 * 200 + (Corner0 + y) * 200 + Corner2 + x];
}
}