From c4c7b696441d476375641bce1cdbc4254e9575b8 Mon Sep 17 00:00:00 2001 From: mawen1250 Date: Sat, 1 Nov 2014 09:19:26 +0800 Subject: [PATCH] Speed up Modify the C++ implement for Recursive_Gaussian_Vertical, which results in about 15% speed up for MSRCP() and MSRCR(). --- source/Gaussian.cpp | 57 ++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/source/Gaussian.cpp b/source/Gaussian.cpp index 3d67c44..c5d7c5b 100644 --- a/source/Gaussian.cpp +++ b/source/Gaussian.cpp @@ -40,37 +40,52 @@ void Recursive_Gaussian_Parameters(const double sigma, FLType & B, FLType & B1, void Recursive_Gaussian2D_Vertical(FLType * output, const FLType * input, int height, int width, int stride, const FLType B, const FLType B1, const FLType B2, const FLType B3) { - int i, j, lower, upper; + int i0, i1, i2, i3, j, lower, upper; FLType P0, P1, P2, P3; int pcount = stride*height; - for (j = 0; j < width; j++) + if (output != input) { - lower = j; - upper = pcount; + memcpy(output, input, sizeof(FLType) * width); + } - i = lower; - output[i] = P3 = P2 = P1 = input[i]; + for (j = 0; j < height; j++) + { + lower = stride * j; + upper = lower + width; + + i0 = lower; + i1 = j < 1 ? i0 : i0 - stride; + i2 = j < 2 ? i1 : i1 - stride; + i3 = j < 3 ? i2 : i2 - stride; - for (i += stride; i < upper; i += stride) + for (; i0 < upper; i0++, i1++, i2++, i3++) { - P0 = B*input[i] + B1*P1 + B2*P2 + B3*P3; - P3 = P2; - P2 = P1; - P1 = P0; - output[i] = P0; + P3 = output[i3]; + P2 = output[i2]; + P1 = output[i1]; + P0 = input[i0]; + output[i0] = B*P0 + B1*P1 + B2*P2 + B3*P3; } + } - i -= stride; - P3 = P2 = P1 = output[i]; + for (j = height - 1; j >= 0; j--) + { + lower = stride * j; + upper = lower + width; + + i0 = lower; + i1 = j >= height - 1 ? i0 : i0 + stride; + i2 = j >= height - 2 ? i1 : i1 + stride; + i3 = j >= height - 3 ? i2 : i2 + stride; - for (i -= stride; i >= lower; i -= stride) + for (; i0 < upper; i0++, i1++, i2++, i3++) { - P0 = B*output[i] + B1*P1 + B2*P2 + B3*P3; - P3 = P2; - P2 = P1; - P1 = P0; - output[i] = P0; + P3 = output[i3]; + P2 = output[i2]; + P1 = output[i1]; + P0 = output[i0]; + output[i0] = B*P0 + B1*P1 + B2*P2 + B3*P3; } } } @@ -82,7 +97,7 @@ void Recursive_Gaussian2D_Horizontal(FLType * output, const FLType * input, int for (j = 0; j < height; j++) { - lower = stride*j; + lower = stride * j; upper = lower + width; i = lower;