Skip to content

Commit

Permalink
Speed up
Browse files Browse the repository at this point in the history
Modify the C++ implement for Recursive_Gaussian_Vertical, which results
in about 15% speed up for MSRCP() and MSRCR().
  • Loading branch information
mawen1250 committed Nov 1, 2014
1 parent a67df07 commit c4c7b69
Showing 1 changed file with 36 additions and 21 deletions.
57 changes: 36 additions & 21 deletions source/Gaussian.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,37 +40,52 @@ void Recursive_Gaussian_Parameters(const double sigma, FLType & B, FLType & B1,

void Recursive_Gaussian2D_Vertical(FLType * output, const FLType * input, int height, int width, int stride, const FLType B, const FLType B1, const FLType B2, const FLType B3)
{
int i, j, lower, upper;
int i0, i1, i2, i3, j, lower, upper;
FLType P0, P1, P2, P3;
int pcount = stride*height;

for (j = 0; j < width; j++)
if (output != input)
{
lower = j;
upper = pcount;
memcpy(output, input, sizeof(FLType) * width);
}

i = lower;
output[i] = P3 = P2 = P1 = input[i];
for (j = 0; j < height; j++)
{
lower = stride * j;
upper = lower + width;

i0 = lower;
i1 = j < 1 ? i0 : i0 - stride;
i2 = j < 2 ? i1 : i1 - stride;
i3 = j < 3 ? i2 : i2 - stride;

for (i += stride; i < upper; i += stride)
for (; i0 < upper; i0++, i1++, i2++, i3++)
{
P0 = B*input[i] + B1*P1 + B2*P2 + B3*P3;
P3 = P2;
P2 = P1;
P1 = P0;
output[i] = P0;
P3 = output[i3];
P2 = output[i2];
P1 = output[i1];
P0 = input[i0];
output[i0] = B*P0 + B1*P1 + B2*P2 + B3*P3;
}
}

i -= stride;
P3 = P2 = P1 = output[i];
for (j = height - 1; j >= 0; j--)
{
lower = stride * j;
upper = lower + width;

i0 = lower;
i1 = j >= height - 1 ? i0 : i0 + stride;
i2 = j >= height - 2 ? i1 : i1 + stride;
i3 = j >= height - 3 ? i2 : i2 + stride;

for (i -= stride; i >= lower; i -= stride)
for (; i0 < upper; i0++, i1++, i2++, i3++)
{
P0 = B*output[i] + B1*P1 + B2*P2 + B3*P3;
P3 = P2;
P2 = P1;
P1 = P0;
output[i] = P0;
P3 = output[i3];
P2 = output[i2];
P1 = output[i1];
P0 = output[i0];
output[i0] = B*P0 + B1*P1 + B2*P2 + B3*P3;
}
}
}
Expand All @@ -82,7 +97,7 @@ void Recursive_Gaussian2D_Horizontal(FLType * output, const FLType * input, int

for (j = 0; j < height; j++)
{
lower = stride*j;
lower = stride * j;
upper = lower + width;

i = lower;
Expand Down

0 comments on commit c4c7b69

Please sign in to comment.