forked from Celebrandil/CudaSift
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cudaImage.cu
115 lines (104 loc) · 3.31 KB
/
cudaImage.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//********************************************************//
// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
//********************************************************//
#include <cstdio>
#include "cudautils.h"
#include "cudaImage.h"
int iDivUp(int a, int b) { return (a%b != 0) ? (a/b + 1) : (a/b); }
int iDivDown(int a, int b) { return a/b; }
int iAlignUp(int a, int b) { return (a%b != 0) ? (a - a%b + b) : a; }
int iAlignDown(int a, int b) { return a - a%b; }
void CudaImage::Allocate(int w, int h, int p, bool host, float *devmem, float *hostmem)
{
width = w;
height = h;
pitch = p;
d_data = devmem;
h_data = hostmem;
t_data = NULL;
if (devmem==NULL) {
safeCall(cudaMallocPitch((void **)&d_data, (size_t*)&pitch, (size_t)(sizeof(float)*width), (size_t)height));
pitch /= sizeof(float);
if (d_data==NULL)
printf("Failed to allocate device data\n");
d_internalAlloc = true;
}
if (host && hostmem==NULL) {
h_data = (float *)malloc(sizeof(float)*pitch*height);
h_internalAlloc = true;
}
}
CudaImage::CudaImage() :
width(0), height(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false)
{
}
CudaImage::~CudaImage()
{
if (d_internalAlloc && d_data!=NULL)
safeCall(cudaFree(d_data));
d_data = NULL;
if (h_internalAlloc && h_data!=NULL)
free(h_data);
h_data = NULL;
if (t_data!=NULL)
safeCall(cudaFreeArray((cudaArray *)t_data));
t_data = NULL;
}
double CudaImage::Download()
{
TimerGPU timer(0);
int p = sizeof(float)*pitch;
if (d_data!=NULL && h_data!=NULL)
safeCall(cudaMemcpy2D(d_data, p, h_data, sizeof(float)*width, sizeof(float)*width, height, cudaMemcpyHostToDevice));
double gpuTime = timer.read();
#ifdef VERBOSE
printf("Download time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
double CudaImage::Readback()
{
TimerGPU timer(0);
int p = sizeof(float)*pitch;
safeCall(cudaMemcpy2D(h_data, sizeof(float)*width, d_data, p, sizeof(float)*width, height, cudaMemcpyDeviceToHost));
double gpuTime = timer.read();
#ifdef VERBOSE
printf("Readback time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
double CudaImage::InitTexture()
{
TimerGPU timer(0);
cudaChannelFormatDesc t_desc = cudaCreateChannelDesc<float>();
safeCall(cudaMallocArray((cudaArray **)&t_data, &t_desc, pitch, height));
if (t_data==NULL)
printf("Failed to allocated texture data\n");
double gpuTime = timer.read();
#ifdef VERBOSE
printf("InitTexture time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
double CudaImage::CopyToTexture(CudaImage &dst, bool host)
{
if (dst.t_data==NULL) {
printf("Error CopyToTexture: No texture data\n");
return 0.0;
}
if ((!host || h_data==NULL) && (host || d_data==NULL)) {
printf("Error CopyToTexture: No source data\n");
return 0.0;
}
TimerGPU timer(0);
if (host)
safeCall(cudaMemcpyToArray((cudaArray *)dst.t_data, 0, 0, h_data, sizeof(float)*pitch*dst.height, cudaMemcpyHostToDevice));
else
safeCall(cudaMemcpyToArray((cudaArray *)dst.t_data, 0, 0, d_data, sizeof(float)*pitch*dst.height, cudaMemcpyDeviceToDevice));
safeCall(cudaDeviceSynchronize());
double gpuTime = timer.read();
#ifdef VERBOSE
printf("CopyToTexture time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}