Skip to content

Commit

Permalink
gabor code cleaned
Browse files Browse the repository at this point in the history
  • Loading branch information
friskluft committed Aug 28, 2024
1 parent f4ab91c commit f7b7fe8
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 135 deletions.
119 changes: 32 additions & 87 deletions src/nyx/features/gabor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,81 +272,42 @@ void GaborFeature::calculate_gpu_multi_filter (LR& r, size_t roiidx)

// Examine the baseline signal

#ifdef GAMLEDAGE
unsigned long baselineScore = 0; // abundance of baseline signal pixels over its average
double maxval = 0.0; // max value of the baseline signal

std::vector<PixIntens>& pix_plane = responses[0];

ImageMatrix e2img;
e2img.allocate(Im0.width, Im0.height);
PixIntens* e2img_ptr = e2img.writable_data_ptr();

for (int k = 0; k < Im0.height * Im0.width; ++k)
e2img_ptr[k] = pix_plane[k];

// Values that we need for scoring filter responses
Moments2 local_stats;
e2img.GetStats(local_stats);
maxval = local_stats.max__();
double cmpval = local_stats.min__();

// Score the baseline signal
for (auto a : pix_plane)
if (double(a) > cmpval)
baselineScore++;
#else
// we need to get these 3 values from response[0], the baseline signal
PixIntens maxval = 0,
cmpval = UINT16_MAX; // min
size_t baselineScore = 0;

size_t roiLen = Im0.width * Im0.height;
Moments2 stats;
for (size_t i = 0; i < roiLen; i++)
size_t wh = Im0.width * Im0.height;
for (size_t i = 0; i < wh; i++)
{
PixIntens p = NyxusGpu::gabor_energy_image.hobuffer[i];
stats.add(p);
auto a = responses[0][i];
maxval = std::max(a, maxval);
cmpval = std::min(a, cmpval);
}
double maxval = stats.max__(),
cmpval = stats.min__();
size_t baselineScore = 0;
for (size_t i = 0; i < roiLen; i++)

for (size_t i = 0; i < wh; i++)
{
PixIntens p = NyxusGpu::gabor_energy_image.hobuffer[i];
if (p > cmpval)
auto a = responses[0][i];
if (a > cmpval)
baselineScore++;
}
#endif

#ifdef GAMLEDAGE
// Iterate high-pass filter response signals and score them
for (auto j=1; j< freqs.size(); j++)
{
std::vector<PixIntens>& responsePixplane = responses [j];

// score it
unsigned long afterGaborScore = 0;
for (PixIntens a : responsePixplane)
if (double(a) / maxval > GRAYthr)
afterGaborScore++;

// save the score as feature value
fvals[j-1] = (double)afterGaborScore / (double)baselineScore;
}
#else

for (auto k = 1; k < freqs.size(); k++)
{
size_t offs = k * roiLen;
size_t offs = k * wh;
size_t afterGaborScore = 0;
// score this filter response
for (size_t i = 0; i < roiLen; i++)
for (size_t i = 0; i < wh; i++)
{
PixIntens p = NyxusGpu::gabor_energy_image.hobuffer [offs + i];
if (double(p) / maxval > GRAYthr)
double a = responses[k][i];
if (a / maxval > GRAYthr)
afterGaborScore++;
}
// save the score as feature value
fvals[k - 1] = (double)afterGaborScore / (double)baselineScore;
}
#endif

}
#endif

Expand Down Expand Up @@ -607,10 +568,10 @@ void GaborFeature::GaborEnergyGPUMultiFilter (
int kerside,
int num_filters)
{
//xxxxxxx int n_gab = n;

readOnlyPixels pix_plane = Im.ReadablePixels();

// result in NyxusGpu::gabor_energy_image.hobuffer
// of length [ num_filters * Im.width * Im.height ]
bool success = CuGabor::conv_dud_gpu_fft_multi_filter (
auxC,
pix_plane.data(),
Expand All @@ -621,38 +582,22 @@ void GaborFeature::GaborEnergyGPUMultiFilter (
kerside,
num_filters,
GaborFeature::dev_filterbank);
if (!success)
std::cerr << "\n\n\nUnable to calculate Gabor features on GPU \n\n\n";

#ifdef GAMLEDAGE
for (int idx, i = 0; i < num_filters; ++i)
if (!success)
{
idx = 2 * i * (Im.width + kerside - 1) * (Im.height + kerside - 1);
decltype(Im.height) b = 0;

for (auto y = (int)ceil((double)kerside / 2); b < Im.height; y++)
{
decltype(Im.width) a = 0;

for (auto x = (int)ceil((double)kerside / 2); a < Im.width; x++)
{
size_t oi = (y * 2 * (Im.width + kerside - 1) + x * 2); // offset within the image, complex layout
if (std::isnan(auxC[idx + oi]) || std::isnan(auxC[idx + oi+1]))
{
out[i][(b * Im.width + a)] = (PixIntens) std::numeric_limits<double>::quiet_NaN();
a++;
continue;
}
out[i][(b * Im.width + a)] = (PixIntens) sqrt(pow(auxC[idx + oi], 2) + pow(auxC[idx + oi+1], 2));
a++;
}
b++;
}
std::cerr << "\n\n\nUnable to calculate Gabor features on GPU \n\n\n";
return;
}
#else

// GPU: 'NyxusGpu::gabor_energy_image.hobuffer' already has the above out[i][.] result

#endif
// Convert it to a 'paged' layout
size_t wh = Im.width * Im.height;
for (int f = 0; f < num_filters; f++)
{
size_t skip = f * wh;
for (size_t i = 0; i < wh; i++)
out[f][i] = NyxusGpu::gabor_energy_image.hobuffer [skip + i];
}

}
#endif
Expand Down
124 changes: 76 additions & 48 deletions src/nyx/gpu/gabor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -285,50 +285,58 @@ namespace CuGabor {
return true;
}

// layouts are different! Complex column-major, energy row-major
// Calculates padded convolution results montage into non-padded energy montage.
// Assumes parallelism by pixels of the result (non-padded E montage).
__global__ void kerCalcEnergy(
// out
PixIntens* nonpadded_e_montage, // montage of smaller (nonpadded) images of size ROI_w*ROI_h
// in
cufftDoubleComplex* padded_montage, // montage of complex
size_t padded_frame_offset,
size_t skipBig,
size_t padded_w,
size_t padded_h,
size_t roi_offset,
size_t roi_w,
size_t roi_h,
size_t skipSmall,
size_t nonpadded_w,
size_t nonpadded_h,
size_t kerside)
{
int c = threadIdx.x + blockIdx.x * blockDim.x;
int r = threadIdx.y + blockIdx.y * blockDim.y;
if (c >= roi_w || r >= roi_h)
if (c >= nonpadded_w || r >= nonpadded_h)
return;

size_t idx_p = r * padded_w + c + kerside / 2;
// for a future 3D implementation:
// size_t skipBig = k * padded_w * padded_h;
// size_t skipSmall = k * nonpadded_w * nonpadded_h;

double plen = (double)(padded_w * padded_h);
cufftDoubleComplex w = padded_montage[padded_frame_offset + idx_p];
size_t halfpad = kerside / 2;
size_t idxBig = (r + halfpad) * (nonpadded_w + kerside - 1) + c + halfpad;

double wx = w.x / plen,
wy = w.y / plen,
double bigLen = (double)(padded_w * padded_h);
cufftDoubleComplex w = padded_montage [skipBig + idxBig];

double wx = w.x / bigLen,
wy = w.y / bigLen,
e = sqrt(wx * wx + wy * wy);

size_t idx_roi = r * roi_w + c;
nonpadded_e_montage[roi_offset + idx_roi] = e;
size_t idxSmall = r * nonpadded_w + c;
nonpadded_e_montage [skipSmall + idxSmall] = e;
}

bool conv_dud_gpu_fft_multi_filter(
bool conv_dud_gpu_fft_multi_filter (
double* out,
const unsigned int* image,
double* kernel,
int image_n, int image_m,
int kernel_n, int kernel_m,
int image_w,
int image_h,
int kernel_w,
int kernel_h,
int n_filters,
double* dev_filterbank)
{
// calculate new size of image based on padding size
int row_size = image_m + kernel_m - 1;
int col_size = image_n + kernel_n - 1;
int row_size = image_h + kernel_h - 1;
int col_size = image_w + kernel_w - 1;
int size = row_size * col_size; // image+kernel size [pixels]

size_t bufsize = 2 * size * n_filters;
Expand All @@ -351,6 +359,7 @@ namespace CuGabor {
cufftDoubleComplex* d_kernel = NyxusGpu::gabor_linear_kernel.devbuffer;
cudaError_t ok;

// prepare the image
for (int batch = 0; batch < n_filters; ++batch)
{
size_t batch_idx = batch * size;
Expand All @@ -359,15 +368,16 @@ namespace CuGabor {
dim3 tpb(block, block);
dim3 bpg(ceil(row_size / block) + 1, ceil(col_size / block) + 1);

re_2_complex_img << <bpg, tpb >> > (d_image, row_size, col_size, image_m, image_n, NyxusGpu::dev_imat1, batch_idx);
re_2_complex_img << <bpg, tpb >> > (d_image, row_size, col_size, image_h, image_w, NyxusGpu::dev_imat1, batch_idx);

CHECKERR(cudaDeviceSynchronize());
CHECKERR(cudaGetLastError());
}

int n[2] = { row_size, col_size };

bool ok2 = dense_2_padded_filterbank(d_kernel, dev_filterbank, image_m, image_n, kernel_m, kernel_n, n_filters);
// prepare the kernel
bool ok2 = dense_2_padded_filterbank(d_kernel, dev_filterbank, image_h, image_w, kernel_h, kernel_w, n_filters);
if (!ok2)
{
std::cerr << "ERROR: CuGabor::dense_2_padded_filterbank failed \n";
Expand Down Expand Up @@ -415,56 +425,74 @@ namespace CuGabor {
CHECKCUFFTERR(call);

//
// no need to memcpy(result<-d_result) above ^^^ !
// no need to memcpy(result<-d_result) above
//

// convert the montage from complex padded layout ('d_result') to real non-padded energy ('NyxusGpu::gabor_energy_image.devbuffer')
for (int k = 0; k < n_filters; k++)
{
size_t padded_off = k * size,
roi_off = k * image_m * image_n;
roi_off = k * image_h * image_w;

int block = 16;
dim3 tpb(block, block);
dim3 bpg(ceil(image_m / block) + 1, ceil(image_n / block) + 1);

/*
// out
PixIntens* nonpadded_e_montage, // montage of smaller (nonpadded) images of size ROI_w*ROI_h
// in
cufftDoubleComplex* padded_montage, // montage of complex
size_t padded_frame_offset,
size_t padded_w,
size_t padded_h,
size_t roi_offset,
size_t roi_w,
size_t roi_h,
size_t kerside
*/
kerCalcEnergy << < bpg, tpb >> > (
dim3 bpg(ceil(image_w / block) + 1, ceil(image_h / block) + 1);

kerCalcEnergy <<< bpg, tpb >>> (
// out
NyxusGpu::gabor_energy_image.devbuffer,
// in
d_result,
padded_off,
row_size, // padded w
col_size, // padded h
col_size, // padded w
row_size, // padded h
roi_off,
image_m, // roi w
image_n, // roi h
kernel_n);
image_w, // roi w
image_h, // roi h
kernel_w);

CHECKERR(cudaDeviceSynchronize());
CHECKERR(cudaGetLastError());
}

// free device memory
cufftDestroy(plan);
cufftDestroy(plan_k);

// download energy on host
size_t szb = n_filters * image_m * image_n * sizeof(NyxusGpu::gabor_energy_image.hobuffer[0]);
size_t szb = n_filters * image_h * image_w * sizeof(NyxusGpu::gabor_energy_image.hobuffer[0]);
CHECKERR(cudaMemcpy(NyxusGpu::gabor_energy_image.hobuffer, NyxusGpu::gabor_energy_image.devbuffer, szb, cudaMemcpyDeviceToHost));

/* The above parallel code implements the following single - thread logic :
* (Converting big complex -> small energy)
*
* OK(NyxusGpu::gabor_result.download());
*
* for (int k = 0; k < n_filters; k++)
* {
* size_t skipBig = k * row_size * col_size;
* size_t skipSmall = k * image_m * image_n;
*
* for (int r = 0; r < image_m; r++)
* for (int c = 0; c < image_n; c++)
* {
* size_t halfpad = kernel_n / 2;
* size_t idxBig = (r + halfpad) * (image_n + kernel_n - 1) + c + halfpad;
*
* double bigLen = (double)(row_size * col_size);
* cufftDoubleComplex w = NyxusGpu::gabor_result.hobuffer[skipBig + idxBig];
*
* double wx = w.x / bigLen,
* wy = w.y / bigLen,
* e = sqrt (wx * wx + wy * wy);
*
* size_t idxSmall = r * image_n + c;
* NyxusGpu::gabor_energy_image.hobuffer [skipSmall + idxSmall] = e;
* }
* }
*
*/

// free device memory
cufftDestroy(plan);
cufftDestroy(plan_k);

return true;
}

Expand Down
10 changes: 10 additions & 0 deletions src/nyx/gpucache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,16 @@ bool GpuCache<cufftDoubleComplex>::alloc(size_t roi_buf_len, size_t num_rois__)
return true;
}

template<>
bool GpuCache<cufftDoubleComplex>::download()
{
size_t szb = total_len * sizeof(devbuffer[0]);
if (!NyxusGpu::download_on_host((void*)hobuffer, (void*)devbuffer, szb))
return false;

return true;
}

template<>
bool GpuCache<PixIntens>::clear()
{
Expand Down

0 comments on commit f7b7fe8

Please sign in to comment.