-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHoughCUDA.c
627 lines (544 loc) · 23.1 KB
/
HoughCUDA.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
/*
* Performs Hought transform given image in the
* parameters uing CUDA. To keep things simple
* file includes copy pasted functions for
* handling PNG files.
*/
#include <getopt.h>
#include <unistd.h>
#include <sys/uio.h>
#include <fcntl.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include <cuda.h>
#ifndef NO_LIBPNG
#include <png.h>
#endif /* NO_LIBPNG */
// Write constant message to the console.
#define msg(f,s) (void)write(f,s "\n",sizeof(s))
// Write constant error message to the console.
#define err(s) msg(STDERR_FILENO,"ERROR: " s)
// Write error message and CUDA error code to the console.
#define errcuda(s,e) {\
err(s); \
const char *str; \
cuGetErrorName(e,&str); \
struct iovec vecs[2]={{.iov_base=(char*)str,.iov_len=strlen(str)},{.iov_base="\n",.iov_len=1}}; \
(void)writev(STDERR_FILENO,vecs,2); \
}
// Simple structure that such has
// information about the PNG image.
// It doesn't store the image data.
//
// Members:
// width is number of the columns
// height is number of rows.
// colortype RGBA or something else.
// bitdepth is number bits per
// channel.
// special is weird one. It
// stores inside of it result
// from png_get_valid which
// answer for example is image
// sRGB or not.
typedef struct{
png_uint_32 width;
png_uint_32 height;
uint32_t special;
uint32_t allocationsize;
int colortype;
int bitdepth;
int interlace;
int compression;
int filter;
int rowsize;
}PngInfo;
// Reads PNG image to CUDA memory area to
// given handle to one area with out row
// pointers.
int pngRead(PngInfo *png,CUdeviceptr *gpumemory,FILE *fp){
#ifdef NO_LIBPNG
// Since we don't have libpng we have to improvise
#else
// Check the header is correct.
unsigned char header[8];
fread(header,1,8,fp);
if(!png_sig_cmp(header,0,8)){
png_structp pngstruct = png_create_read_struct(PNG_LIBPNG_VER_STRING,0,0,0);
if(pngstruct){
png_infop pnginfo = png_create_info_struct(pngstruct);
if(pnginfo){
// Do I really need to setjmp.
// Yes and no. libpng has to be compiled with with PNG_NO_SETJMP to not have to do this.
if(setjmp(png_jmpbuf(pngstruct))){
err("With in the libpng!");
// This command destroys both pngstruct and pnginfo.
png_destroy_read_struct(&pngstruct,&pnginfo,0);
return 0;
}
// Default behavior.
// You could replace final writting, reading, etc functions but why would you?
png_init_io(pngstruct,fp);
// This kind awful design since png_sig_cmp can take 1 to 8 bytes in it
// At this point programmer has to link amount already read with the png_struct
// so that library reads correct places.
png_set_sig_bytes(pngstruct,8);
// TODO: How to handle paletted images since bit per channel isn't constant.
// TODO: Does interlacing need special handling?
png_read_info(pngstruct,pnginfo);
png_get_IHDR(pngstruct,pnginfo,&png->width,&png->height,&png->bitdepth,&png->colortype,&png->interlace,&png->compression,&png->filter);
//png->special=png_get_valid(pngstruct,pnginfo,PNG_INFO_sBIT);
png->rowsize = png_get_rowbytes(pngstruct,pnginfo);
png->allocationsize=png->rowsize*png->height;
// If image is RGB allocation times 3.
if(png->colortype==PNG_COLOR_TYPE_RGB) png->allocationsize*=3;
// Allocate memory for the image.
if(cuMemAlloc(gpumemory,png->allocationsize)==CUDA_SUCCESS){
uint8_t *memory=malloc(png->allocationsize);
if(memory){
// New jump point for png_read_row error so that
// we can free memory allocations.
if(setjmp(png_jmpbuf(pngstruct))) goto _jmp_ERROR_EXIT;
// Reads rows to one memory area.
for(uint32_t i=0;i<png->height;i++) png_read_row(pngstruct,memory+i*png->rowsize,0);
// Move to device
if(cuMemcpyHtoD(*gpumemory,memory,png->allocationsize)==CUDA_SUCCESS){
free(memory);
// Free allocated resources.
png_destroy_read_struct(&pngstruct,&pnginfo,0);
return 1;
}
// Jump here happens if long jump from PNG error.
_jmp_ERROR_EXIT:
free(memory);
}
else err("pngRead | malloc!");
cuMemFree(*gpumemory);
}
else err("pngRead | cuMemAlloc!");
// Destyroy info structure now so that next we can destroy
// the png_struct.
png_destroy_info_struct(pngstruct,&pnginfo);
}
// Only free the png_struct since png_info couldn't be created.
png_destroy_read_struct(&pngstruct,0,0);
}
// png_struct couldn't be created!
}
// File given wasn't PNG file!
return 0;
#endif /* NO_LIBPNG */
}
// Write host memory image to PNG file.
int pngWriteHost(const PngInfo *info,const png_bytep memory,FILE *fp){
png_structp pngstruct=png_create_write_struct(PNG_LIBPNG_VER_STRING,0,0,0);
if(!pngstruct) return 0;
png_infop pnginfo=png_create_info_struct(pngstruct);
if(!pnginfo){
png_destroy_write_struct(&pngstruct,0);
return 0;
}
// Setup longjump for libpng to return to
// if it encounters an error.
if(setjmp(png_jmpbuf(pngstruct))){
err("With in the libpng!");
png_destroy_write_struct(&pngstruct,&pnginfo);
return 0;
}
// Default underline behavior.
png_init_io(pngstruct,fp);
// Write header.
// Parameters are:
// Width,height,bit depth,color type,interlace type,compression type,filter type
png_set_IHDR(pngstruct,pnginfo,info->width,info->height,info->bitdepth,info->colortype,info->interlace,info->compression,info->filter);
png_write_info(pngstruct,pnginfo);
for(uint32_t i=0;i<info->height;i++) png_write_row(pngstruct,memory+i*info->rowsize);
png_write_end(pngstruct,pnginfo);
// Free Libpng write resources.
png_free_data(pngstruct,pnginfo,PNG_FREE_ALL,-1);
png_destroy_info_struct(pngstruct,&pnginfo);
png_destroy_write_struct(&pngstruct,0);
return 1;
}
// Write CUDA device memory to PNG file.
int pngWrite(const PngInfo *info,const CUdeviceptr gpumemory,FILE *fp){
#ifdef NO_LIBPNG
// No libpng available has to improvise.
#else
// Allocate temporary memory region of memory.
png_bytep memory=malloc(sizeof(png_byte)*info->allocationsize);
if(memory){
// Move the gpumemory
cuMemcpyDtoH(memory,gpumemory,sizeof(png_byte)*info->allocationsize);
int result=pngWriteHost(info,memory,fp);
free(memory);
return result;
}
else err("pngWrite | malloc!");
return 0;
#endif /* NO_LIBPNG */
}
// Main entry to the program.
// Does initialiation and GPU ordering.
//
// Commandline arguments:
// Non-options are images to be hough
// tranformed. For now have to PNGs.
// -g index selecs the GPU of given index.
// -e threshold selects threshold used in
// edge detection.
//
int main(int argn,char **args){
// Do get more knowledgable abouts errors have general
// CUResult capture.
CUresult ecode;
// Debugging flags.
struct{
bool gray:1;
bool edge:1;
}debugflags={false,false};
// Initialization function for CUDA.
// Flag is zero since it has to be!
if((ecode=cuInit(0))==CUDA_SUCCESS){
// Which GPU to use. Defaults to first one.
int selectedgpu=0;
// Númber of GPU in the system.
int numberofgpus=0;
// Edge detection threshold
float edgethreshold=0.2;
if((ecode=cuDeviceGetCount(&numberofgpus))==CUDA_SUCCESS){
// Handle Arguments with getopt.
// For simplicity don't use long options.
{
int c;
while((c=getopt(argn,args,"hg:e:d:"))!=-1){
switch(c){
case 'h':
msg(STDOUT_FILENO,"Usage: hough [options] <PNG file>\n\n"
"Options:\n"
"\t-g <integer> select GPU.\n"
"\t-e <float> give edge threshold (between 0 and 1).\n"
"\t-d <value> debug output.Possible values:\n"
"\t\tgray\twrite colour to gray conversion image.\n"
"\t\tedge\twrite edge detection image."
);
break;
case 'g':
selectedgpu=atoi(optarg);
break;
case 'e':
edgethreshold=atof(optarg);
break;
case 'd':
if(strcmp(optarg,"gray")==0){
debugflags.gray=true;
}
else if(strcmp(optarg,"edge")==0){
debugflags.edge=true;
}
else{
err("Unknown value for debugging");
return 0;
}
}
}
}
// Get handler for GPU to be used
// for this hought transform. Make
// sure that selectedgpu is less
// the numberofgpus.
if(selectedgpu<numberofgpus){
CUdevice gpu;
if((ecode=cuDeviceGet(&gpu,selectedgpu))==CUDA_SUCCESS){
// Maximum number threads.
int maxthreads;
cuDeviceGetAttribute(&maxthreads,CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,gpu);
// Maximum number of threads in x dimension.
int maxblockdimx;
cuDeviceGetAttribute(&maxblockdimx,CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,gpu);
// Maximum number of threads in y dimension.
int maxblockdimy;
cuDeviceGetAttribute(&maxblockdimy,CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,gpu);
// Maximum number of blocks in x dimemsion.
int maxgriddimx;
cuDeviceGetAttribute(&maxgriddimx,CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,gpu);
// Maximum number of blocks in y dimemsion.
int maxgriddimy;
cuDeviceGetAttribute(&maxgriddimy,CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,gpu);
// Maximum amount of shared memory per block in bytes.
int maxshared;
cuDeviceGetAttribute(&maxshared,CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,gpu);
// Amount of threads in warp (number of threads executed simultaneously).
int warpsize;
cuDeviceGetAttribute(&warpsize,CU_DEVICE_ATTRIBUTE_WARP_SIZE,gpu);
// Number of processor
int gpuprocessors;
cuDeviceGetAttribute(&gpuprocessors,CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,gpu);
// Create context for GPU cpu interaction.
CUcontext context;
if(cuCtxCreate(&context,CU_CTX_SCHED_SPIN,gpu)==CUDA_SUCCESS){
// Load the "shared library" which has GPU code and
// "get address" to functions.
CUmodule libhough;
if((ecode=cuModuleLoad(&libhough,"libhough.cubin"))==CUDA_SUCCESS){
CUfunction rgbtograykernel;
if((ecode=cuModuleGetFunction(&rgbtograykernel,libhough,"rgbToGray"))==CUDA_SUCCESS){
CUfunction sobelkernel;
if((ecode=cuModuleGetFunction(&sobelkernel,libhough,"sobel"))==CUDA_SUCCESS){
CUfunction houghlinekernel;
if((ecode=cuModuleGetFunction(&houghlinekernel,libhough,"houghLine"))==CUDA_SUCCESS){
CUfunction renderlineskernel;
if((ecode=cuModuleGetFunction(&renderlineskernel,libhough,"renderLinesPolar"))==CUDA_SUCCESS){
// LOAD THE IMAGE LOOP
// Loop for over every PNG image.
// Loop goes through all the
for(char *file=args[optind];optind<argn;file=args[++optind]){
FILE *readfd=fopen(file,"rb");
if(readfd){
PngInfo pnginfo;
CUdeviceptr image;
if(pngRead(&pnginfo,&image,readfd)){
// CALCULATE GRID AND BLOCK SIZE
// TODO: What happens if image is too small.
// TODO: What if image is so big there
// need to have loop inside of the GPU code?
unsigned int blockx=warpsize;
unsigned int blocky=warpsize;
unsigned int gridx=pnginfo.width/blockx+(pnginfo.width/blockx>0);
unsigned int gridy=pnginfo.height/blocky+(pnginfo.height/blocky>0);
// Change color images to gray scale and continue with gray scale images.
CUdeviceptr grayimage;
switch(pnginfo.colortype){
case PNG_COLOR_TYPE_RGB:
if((ecode=cuMemAlloc(&grayimage,pnginfo.width*pnginfo.height))==CUDA_SUCCESS){
// Execute RGB to Gray kernel to get gray.
void *args[]={&image,&grayimage,&pnginfo.width,&pnginfo.height,0};
if((ecode=cuLaunchKernel(rgbtograykernel,gridx,gridy,1,blockx,blocky,1,0,0,args,0))!=CUDA_SUCCESS){
errcuda("cuLaunchKernel | grayimage!",ecode);
goto jmp_SAFE_EXIT_GRAYIMAGE;
}
// Do user want to write gray image as debug.
if(debugflags.gray){
FILE *fp=fopen("/tmp/gray.png","wb");
PngInfo debugpnginfo;
debugpnginfo.width=pnginfo.width;
debugpnginfo.height=pnginfo.height;
debugpnginfo.special=pnginfo.special;
debugpnginfo.allocationsize=pnginfo.width*pnginfo.height;
debugpnginfo.colortype=PNG_COLOR_TYPE_GRAY;
debugpnginfo.bitdepth=8;
debugpnginfo.interlace=0;
debugpnginfo.compression=0;
debugpnginfo.filter=0;
debugpnginfo.rowsize=pnginfo.width;
pngWrite(&debugpnginfo,grayimage,fp);
fclose(fp);
}
}
else{
errcuda("cuMemAlloc | grayimage!",ecode);
goto jmp_SAFE_EXIT_GRAYIMAGE;
}
break;
case PNG_COLOR_TYPE_GRAY:
grayimage=image;
break;
default:
fprintf(stderr,"\nSkipping the file \"%s\" since it has unimplemented colortype!\n",file);
cuMemFree(image);
continue;
}
// Run Egde detection. Output will be list of indexes (y*width+x)
// and number memory allocated for that list is used.
CUdeviceptr binedge;
if((ecode=cuMemAlloc(&binedge,pnginfo.width*pnginfo.height*sizeof(uint8_t)))==CUDA_SUCCESS){
{
void *args[]={&grayimage,&binedge,&pnginfo.width,&pnginfo.height,&edgethreshold,0};
ecode=cuLaunchKernel(sobelkernel,gridx,gridy,1,blockx,blocky,1,0,0,args,0);
}
if(ecode==CUDA_SUCCESS){
// Make edge list from grayimage manually on CPU since we have to
// count up which isn't easy to do.
// TODO: Design how to do this on GPU at edge detector!
uint32_t *deviceedgelist=malloc(sizeof(uint32_t)*pnginfo.width*pnginfo.height);
if(deviceedgelist){
uint32_t edgelistcount=0;
uint8_t *edgeimagedevice=malloc(sizeof(uint8_t)*pnginfo.width*pnginfo.height);
if(edgeimagedevice){
cuMemcpyDtoH(edgeimagedevice,binedge,sizeof(uint8_t)*pnginfo.width*pnginfo.height);
// Don't need edgeimage anymore.
cuMemFree(binedge);
// If debugging the edge image is enabled write the edge image.
if(debugflags.edge){
FILE *fp=fopen("/tmp/edge.png","wb");
PngInfo debugpnginfo;
debugpnginfo.width=pnginfo.width;
debugpnginfo.height=pnginfo.height;
debugpnginfo.special=pnginfo.special;
debugpnginfo.allocationsize=0;
debugpnginfo.colortype=PNG_COLOR_TYPE_GRAY;
debugpnginfo.bitdepth=8;
debugpnginfo.interlace=0;
debugpnginfo.compression=0;
debugpnginfo.filter=0;
debugpnginfo.rowsize=pnginfo.width;
pngWriteHost(&debugpnginfo,(png_bytep)edgeimagedevice,fp);
fclose(fp);
}
for(uint32_t x=0;x<pnginfo.width;x++){
for(uint32_t y=0;y<pnginfo.height;y++){
if(edgeimagedevice[y*pnginfo.width+x]>0) deviceedgelist[edgelistcount++]=(x<<16)+y;
}
}
// If we don't have enough edge pixels, then
// there isn't much of point continuing.
// Warpsize used as counter so that later
// when calculating kernel grid sizes isn't
// zero.
// TODO: Better grid size calculation on hough transform.
if(edgelistcount>warpsize*2){
CUdeviceptr edgelist;
if((ecode=cuMemAlloc(&edgelist,sizeof(uint32_t)*edgelistcount))==CUDA_SUCCESS){
free(deviceedgelist);
free(edgeimagedevice);
// Run the hough transform.
// Calculate angle and distance difference used in accumulator.
// Angle should be between 0 and <PI>/2 and radius should be
// between zero and image diagonal (sqrt(width^2+height^2)).
// Calculate number of ticks based upon this number which
// are floored because extreame values probably don't exist.
float maxradius=sqrt(pnginfo.width*pnginfo.width+pnginfo.height*pnginfo.height);
float angled=0.001;
float radiusd=2;
uint16_t angleticks=(uint16_t)floor((M_PI/2)/angled);
uint16_t radiusticks=(uint16_t)floor(maxradius/radiusd);
// Allocate accumator based upon number ticks we have.
// Also memset to zero so that we have clean memory.
CUdeviceptr accumulator;
if(cuMemAlloc(&accumulator,sizeof(uint32_t)*angleticks*radiusticks)==CUDA_SUCCESS){
cuMemsetD32(accumulator,0,angleticks*radiusticks);
// Run thread per edge index.
// Use only dimension as input is index list.
{
void *args[]={&edgelist,&edgelistcount,&accumulator,&angled,&radiusd,&angleticks,&radiusticks,0};
ecode=cuLaunchKernel(houghlinekernel,(edgelistcount/warpsize)+(edgelistcount%warpsize>0),1,1,warpsize,1,1,0,0,args,0);
}
if(ecode==CUDA_SUCCESS){
// Collect information for render the lines.
uint32_t *accumulatorhost=malloc(sizeof(uint32_t)*angleticks*radiusticks);
if(accumulatorhost){
cuMemcpyDtoH(accumulatorhost,accumulator,sizeof(uint32_t)*angleticks*radiusticks);
uint32_t *hostlineparameters=malloc(sizeof(uint32_t)*edgelistcount);
if(hostlineparameters){
uint32_t peakthreas=2800;
uint32_t peakcount=0;
for(uint16_t angle=0;angle<angleticks;angle++){
for(uint16_t radius=0;radius<radiusticks;radius++){
if(accumulatorhost[radius*angleticks+angle]>peakthreas){
hostlineparameters[peakcount++]=(angle<<16)+radius;
}
}
}
// Allocate memory for making lines images.
CUdeviceptr lineparameters;
if(peakcount>0){
if((ecode=cuMemAlloc(&lineparameters,sizeof(uint32_t)*peakcount))==CUDA_SUCCESS){
cuMemcpyHtoD(lineparameters,hostlineparameters,sizeof(uint32_t)*peakcount);
CUdeviceptr finalimage;
if((ecode=cuMemAlloc(&finalimage,pnginfo.width*pnginfo.height))==CUDA_SUCCESS){
cuMemsetD8(finalimage,0,pnginfo.width*pnginfo.height);
// Give rendering information.
{
void *args[]={&lineparameters,&peakcount,&finalimage,&pnginfo.width,&pnginfo.height,&angled,&radiusd};
ecode=cuLaunchKernel(renderlineskernel,peakcount/warpsize+(peakcount%warpsize>0),1,1,warpsize,1,1,0,0,args,0);
}
if(ecode==CUDA_SUCCESS){
// Make sure image to be written out is gray image with 8 bit channel.
pnginfo.allocationsize=pnginfo.width*pnginfo.height;
pnginfo.rowsize=pnginfo.width;
pnginfo.colortype=PNG_COLOR_TYPE_GRAY;
pnginfo.bitdepth=8;
FILE *wfd=fopen("test.png","wb");
pngWrite(&pnginfo,accumulator,wfd);
fclose(wfd);
}
else errcuda("cuLaunchKernel | renderLines!",ecode);
cuMemFree(finalimage);
}
else errcuda("cuMemAlloc | finalimage!",ecode);
cuMemFree(lineparameters);
}
else errcuda("cuMemAlloc | lineparameters!",ecode);
free(hostlineparameters);
}
else msg(STDOUT_FILENO,"No edges found?");
}
else err("malloc failled!");
free(accumulatorhost);
}
else err("malloc failled!");
}
else errcuda("cuLaunchKernel | hough!",ecode);
}
}
else errcuda("cuMemAlloc | edge allocation!",ecode);
}
else{
free(deviceedgelist);
free(edgeimagedevice);
err("No edge pixel found!");
}
}
else{
free(deviceedgelist);
err("malloc failled!");
}
}
else err("malloc failled!");
}
else{
cuMemFree(binedge);
errcuda("cuLaunchKernel | sobelkernel!",ecode);
}
}
else errcuda("cuMemAlloc | binedge!",ecode);
// Program jumps here if rgbToGray
// errors for some reason.
jmp_SAFE_EXIT_GRAYIMAGE:
// If image was already gray we don't
// need to free gray version of it.
if(grayimage!=image) cuMemFree(grayimage);
cuMemFree(image);
}
else fprintf(stderr,"\nPNG read error happened to \"%s\".\nProgram continues despite this!\n",file);
fclose(readfd);
}
else fprintf(stderr,"\nPNG read error happened to \"%s\"!\n Program continues despite this!\n",file);
}
}
else errcuda("Kernel | renderLines!",ecode);
}
else errcuda("Kernel | houghline!",ecode);
}
else errcuda("Kernel | sobel!",ecode);
}
else errcuda("Kernel | rgbToGray!",ecode);
// Unload the module
cuModuleUnload(libhough);
}
else errcuda("cuModuleLoad | libhough!",ecode);
// Since loop is behind us just destroy the GPU context.
cuCtxDestroy(context);
}
else errcuda("cuCtxCreate!",ecode);
}
else errcuda("cuDeviceGet!",ecode);
}
else err("selectedgpu is more of equal to number GPUs!");
}
else errcuda("cuDeviceGetCount!",ecode);
}
else errcuda("cuInit!",ecode);
return 0;
}