Reduce code duplication

HomeOfVapourSynthEvolution · Jul 5, 2020 · d0df8c2 · d0df8c2
1 parent fff2c99
commit d0df8c2
Show file tree

Hide file tree

Showing 4 changed files with 206 additions and 377 deletions.
diff --git a/CAS/CAS.cpp b/CAS/CAS.cpp
@@ -181,10 +181,10 @@ static void VS_CC casCreate(const VSMap * in, VSMap * out, void * userData, VSCo
 
         for (int plane = 0; plane < d->vi->format->numPlanes; plane++) {
             if (d->vi->width >> (plane ? d->vi->format->subSamplingW : 0) < 3)
-                throw "every plane's width must be greater than or equal to 3";
+                throw "plane's width must be greater than or equal to 3";
 
             if (d->vi->height >> (plane ? d->vi->format->subSamplingH : 0) < 3)
-                throw "every plane's height must be greater than or equal to 3";
+                throw "plane's height must be greater than or equal to 3";
         }
 
         d->sharpness = static_cast<float>(vsapi->propGetFloat(in, "sharpness", 0, &err));

diff --git a/CAS/CAS_AVX2.cpp b/CAS/CAS_AVX2.cpp
@@ -3,42 +3,45 @@
 
 template<typename pixel_t>
 void filter_avx2(const VSFrameRef * src, VSFrameRef * dst, const CASData * const VS_RESTRICT data, const VSAPI * vsapi) noexcept {
-    auto load_8u = [](const void * srcp) noexcept {
+    using var_t = std::conditional_t<std::is_integral_v<pixel_t>, int, float>;
+    using vec_t = std::conditional_t<std::is_integral_v<pixel_t>, Vec8i, Vec8f>;
+
+    const vec_t limit = std::any_cast<var_t>(data->limit);
+
+    auto load = [](const pixel_t * srcp) noexcept {
         if constexpr (std::is_same_v<pixel_t, uint8_t>)
-            return Vec8i().load_8uc(srcp);
+            return vec_t().load_8uc(srcp);
+        else if constexpr (std::is_same_v<pixel_t, uint16_t>)
+            return vec_t().load_8us(srcp);
         else
-            return Vec8i().load_8us(srcp);
+            return vec_t().load(srcp);
     };
 
-    auto store_8u = [&](const Vec8f __result, void * dstp) noexcept {
-        const Vec8i _result = truncatei(__result + 0.5f);
-
+    auto store = [&](const Vec8f srcp, pixel_t * dstp) noexcept {
         if constexpr (std::is_same_v<pixel_t, uint8_t>) {
-            const auto result = compress_saturated_s2u(compress_saturated(_result, zero_si256()), zero_si256()).get_low();
+            const auto result = compress_saturated_s2u(compress_saturated(truncatei(srcp + 0.5f), zero_si256()), zero_si256()).get_low();
             result.storel(dstp);
-        } else {
-            const auto result = compress_saturated_s2u(_result, zero_si256()).get_low();
+        } else if constexpr (std::is_same_v<pixel_t, uint16_t>) {
+            const auto result = compress_saturated_s2u(truncatei(srcp + 0.5f), zero_si256()).get_low();
             min(result, data->peak).store_nt(dstp);
+        } else {
+            srcp.store_nt(dstp);
         }
     };
 
-    using var_t = std::conditional_t<std::is_integral_v<pixel_t>, Vec8i, Vec8f>;
-
-    const var_t limit = std::any_cast<std::conditional_t<std::is_integral_v<pixel_t>, int, float>>(data->limit);
-
-    auto filtering = [&](const var_t a, const var_t b, const var_t c, const var_t d, const var_t e, const var_t f, const var_t g, const var_t h, const var_t i,
+    auto filtering = [&](const vec_t a, const vec_t b, const vec_t c, const vec_t d, const vec_t e, const vec_t f, const vec_t g, const vec_t h, const vec_t i,
                          const Vec8f chromaOffset) noexcept {
         // Soft min and max.
         //  a b c             b
         //  d e f * 0.5  +  d e f * 0.5
         //  g h i             h
         // These are 2.0x bigger (factored out the extra multiply).
-        var_t mn = min(min(min(d, e), min(f, b)), h);
-        const var_t mn2 = min(min(min(mn, a), min(c, g)), i);
+        vec_t mn = min(min(min(d, e), min(f, b)), h);
+        const vec_t mn2 = min(min(min(mn, a), min(c, g)), i);
         mn += mn2;
 
-        var_t mx = max(max(max(d, e), max(f, b)), h);
-        const var_t mx2 = max(max(max(mx, a), max(c, g)), i);
+        vec_t mx = max(max(max(d, e), max(f, b)), h);
+        const vec_t mx2 = max(max(max(mx, a), max(c, g)), i);
         mx += mx2;
 
         if constexpr (std::is_floating_point_v<pixel_t>) {
@@ -77,128 +80,68 @@ void filter_avx2(const VSFrameRef * src, VSFrameRef * dst, const CASData * const
 
             const Vec8f chromaOffset = plane ? 1.0f : 0.0f;
 
-            const int regularPart = (width - 1) & ~(Vec8i().size() - 1);
+            const int regularPart = (width - 1) & ~(vec_t().size() - 1);
 
             for (int y = 0; y < height; y++) {
                 const pixel_t * above = srcp + (y == 0 ? stride : -stride);
                 const pixel_t * below = srcp + (y == height - 1 ? -stride : stride);
 
-                if constexpr (std::is_integral_v<pixel_t>) {
-                    {
-                        const Vec8i b = load_8u(above + 0);
-                        const Vec8i e = load_8u(srcp + 0);
-                        const Vec8i h = load_8u(below + 0);
-
-                        const Vec8i a = permute8<1, 0, 1, 2, 3, 4, 5, 6>(b);
-                        const Vec8i d = permute8<1, 0, 1, 2, 3, 4, 5, 6>(e);
-                        const Vec8i g = permute8<1, 0, 1, 2, 3, 4, 5, 6>(h);
-
-                        Vec8i c, f, i;
-                        if (width > Vec8i().size()) {
-                            c = load_8u(above + 1);
-                            f = load_8u(srcp + 1);
-                            i = load_8u(below + 1);
-                        } else {
-                            c = permute8<1, 2, 3, 4, 5, 6, 7, 6>(b);
-                            f = permute8<1, 2, 3, 4, 5, 6, 7, 6>(e);
-                            i = permute8<1, 2, 3, 4, 5, 6, 7, 6>(h);
-                        }
-
-                        const Vec8f result = filtering(a, b, c,
-                                                       d, e, f,
-                                                       g, h, i,
-                                                       chromaOffset);
-
-                        store_8u(result, dstp + 0);
-                    }
-
-                    for (int x = Vec8i().size(); x < regularPart; x += Vec8i().size()) {
-                        const Vec8f result = filtering(load_8u(above + x - 1), load_8u(above + x), load_8u(above + x + 1),
-                                                       load_8u(srcp + x - 1), load_8u(srcp + x), load_8u(srcp + x + 1),
-                                                       load_8u(below + x - 1), load_8u(below + x), load_8u(below + x + 1),
-                                                       chromaOffset);
-
-                        store_8u(result, dstp + x);
+                {
+                    const vec_t b = load(above + 0);
+                    const vec_t e = load(srcp + 0);
+                    const vec_t h = load(below + 0);
+
+                    const vec_t a = permute8<1, 0, 1, 2, 3, 4, 5, 6>(b);
+                    const vec_t d = permute8<1, 0, 1, 2, 3, 4, 5, 6>(e);
+                    const vec_t g = permute8<1, 0, 1, 2, 3, 4, 5, 6>(h);
+
+                    vec_t c, f, i;
+                    if (width > vec_t().size()) {
+                        c = load(above + 1);
+                        f = load(srcp + 1);
+                        i = load(below + 1);
+                    } else {
+                        c = permute8<1, 2, 3, 4, 5, 6, 7, 6>(b);
+                        f = permute8<1, 2, 3, 4, 5, 6, 7, 6>(e);
+                        i = permute8<1, 2, 3, 4, 5, 6, 7, 6>(h);
                     }
 
-                    if (regularPart >= Vec8i().size()) {
-                        const Vec8i a = load_8u(above + regularPart - 1);
-                        const Vec8i d = load_8u(srcp + regularPart - 1);
-                        const Vec8i g = load_8u(below + regularPart - 1);
+                    const Vec8f result = filtering(a, b, c,
+                                                   d, e, f,
+                                                   g, h, i,
+                                                   chromaOffset);
 
-                        const Vec8i b = load_8u(above + regularPart);
-                        const Vec8i e = load_8u(srcp + regularPart);
-                        const Vec8i h = load_8u(below + regularPart);
-
-                        const Vec8i c = permute8<1, 2, 3, 4, 5, 6, 7, 6>(b);
-                        const Vec8i f = permute8<1, 2, 3, 4, 5, 6, 7, 6>(e);
-                        const Vec8i i = permute8<1, 2, 3, 4, 5, 6, 7, 6>(h);
-
-                        const Vec8f result = filtering(a, b, c,
-                                                       d, e, f,
-                                                       g, h, i,
-                                                       chromaOffset);
-
-                        store_8u(result, dstp + regularPart);
-                    }
-                } else {
-                    {
-                        const Vec8f b = Vec8f().load_a(above + 0);
-                        const Vec8f e = Vec8f().load_a(srcp + 0);
-                        const Vec8f h = Vec8f().load_a(below + 0);
-
-                        const Vec8f a = permute8<1, 0, 1, 2, 3, 4, 5, 6>(b);
-                        const Vec8f d = permute8<1, 0, 1, 2, 3, 4, 5, 6>(e);
-                        const Vec8f g = permute8<1, 0, 1, 2, 3, 4, 5, 6>(h);
-
-                        Vec8f c, f, i;
-                        if (width > Vec8f().size()) {
-                            c = Vec8f().load(above + 1);
-                            f = Vec8f().load(srcp + 1);
-                            i = Vec8f().load(below + 1);
-                        } else {
-                            c = permute8<1, 2, 3, 4, 5, 6, 7, 6>(b);
-                            f = permute8<1, 2, 3, 4, 5, 6, 7, 6>(e);
-                            i = permute8<1, 2, 3, 4, 5, 6, 7, 6>(h);
-                        }
-
-                        const Vec8f result = filtering(a, b, c,
-                                                       d, e, f,
-                                                       g, h, i,
-                                                       chromaOffset);
-
-                        result.store_nt(dstp + 0);
-                    }
+                    store(result, dstp + 0);
+                }
 
-                    for (int x = Vec8f().size(); x < regularPart; x += Vec8f().size()) {
-                        const Vec8f result = filtering(Vec8f().load(above + x - 1), Vec8f().load_a(above + x), Vec8f().load(above + x + 1),
-                                                       Vec8f().load(srcp + x - 1), Vec8f().load_a(srcp + x), Vec8f().load(srcp + x + 1),
-                                                       Vec8f().load(below + x - 1), Vec8f().load_a(below + x), Vec8f().load(below + x + 1),
-                                                       chromaOffset);
+                for (int x = vec_t().size(); x < regularPart; x += vec_t().size()) {
+                    const Vec8f result = filtering(load(above + x - 1), load(above + x), load(above + x + 1),
+                                                   load(srcp + x - 1), load(srcp + x), load(srcp + x + 1),
+                                                   load(below + x - 1), load(below + x), load(below + x + 1),
+                                                   chromaOffset);
 
-                        result.store_nt(dstp + x);
-                    }
+                    store(result, dstp + x);
+                }
 
-                    if (regularPart >= Vec8f().size()) {
-                        const Vec8f a = Vec8f().load(above + regularPart - 1);
-                        const Vec8f d = Vec8f().load(srcp + regularPart - 1);
-                        const Vec8f g = Vec8f().load(below + regularPart - 1);
+                if (regularPart >= vec_t().size()) {
+                    const vec_t a = load(above + regularPart - 1);
+                    const vec_t d = load(srcp + regularPart - 1);
+                    const vec_t g = load(below + regularPart - 1);
 
-                        const Vec8f b = Vec8f().load_a(above + regularPart);
-                        const Vec8f e = Vec8f().load_a(srcp + regularPart);
-                        const Vec8f h = Vec8f().load_a(below + regularPart);
+                    const vec_t b = load(above + regularPart);
+                    const vec_t e = load(srcp + regularPart);
+                    const vec_t h = load(below + regularPart);
 
-                        const Vec8f c = permute8<1, 2, 3, 4, 5, 6, 7, 6>(b);
-                        const Vec8f f = permute8<1, 2, 3, 4, 5, 6, 7, 6>(e);
-                        const Vec8f i = permute8<1, 2, 3, 4, 5, 6, 7, 6>(h);
+                    const vec_t c = permute8<1, 2, 3, 4, 5, 6, 7, 6>(b);
+                    const vec_t f = permute8<1, 2, 3, 4, 5, 6, 7, 6>(e);
+                    const vec_t i = permute8<1, 2, 3, 4, 5, 6, 7, 6>(h);
 
-                        const Vec8f result = filtering(a, b, c,
-                                                       d, e, f,
-                                                       g, h, i,
-                                                       chromaOffset);
+                    const Vec8f result = filtering(a, b, c,
+                                                   d, e, f,
+                                                   g, h, i,
+                                                   chromaOffset);
 
-                        result.store_nt(dstp + regularPart);
-                    }
+                    store(result, dstp + regularPart);
                 }
 
                 srcp += stride;