From 4602f9aab6290b74c1db182876868eea6ac3d484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albin=20Ahlb=C3=A4ck?= Date: Tue, 14 May 2024 01:05:57 +0200 Subject: [PATCH] Optimize n_revbin for Arm64 And remove byte_swap from longlong.h --- doc/source/longlong.rst | 8 ----- src/longlong.h | 28 ---------------- src/longlong_msc_arm64.h | 3 -- src/longlong_msc_x86.h | 6 ---- src/test/main.c | 2 -- src/test/t-byte_swap.c | 68 --------------------------------------- src/ulong_extras/revbin.c | 61 +++++++++++++++++++++++++++++++---- 7 files changed, 54 insertions(+), 122 deletions(-) delete mode 100644 src/test/t-byte_swap.c diff --git a/doc/source/longlong.rst b/doc/source/longlong.rst index e4b84253f2..3c6acd21cb 100644 --- a/doc/source/longlong.rst +++ b/doc/source/longlong.rst @@ -79,11 +79,3 @@ Division Works like ``udiv_qrnnd``, but takes a precomputed inverse ``di`` as computed by ::func::`n_preinvert_limb`. - -Miscellaneous -------------------------------------------------------------------------------- - -.. macro:: byte_swap(x) - - Swap the order of the bytes in the word `x`, i.e. most significant byte - becomes least significant byte, etc. diff --git a/src/longlong.h b/src/longlong.h index 285aec8b35..3fc7808923 100644 --- a/src/longlong.h +++ b/src/longlong.h @@ -34,11 +34,6 @@ extern "C" { # define flint_ctz __builtin_ctzl # endif -/* Byte swap */ -# define _FLINT_CAT_(X,Y) X##Y -# define _FLINT_CAT(X,Y) _FLINT_CAT_(X,Y) -# define byte_swap(x) do { (x) = _FLINT_CAT(__builtin_bswap, FLINT_BITS)(x); } while (0) - /* Addition, subtraction and multiplication */ # if defined(__clang__) # include "longlong_asm_clang.h" @@ -97,29 +92,6 @@ static inline int flint_ctz(ulong x) } #endif -/* Byte swap */ -#if !defined(byte_swap) -# if FLINT_BITS == 32 -# define byte_swap(n) \ - do { \ - /* swap adjacent bytes */ \ - (n) = ((((n) & 0xff00ff00) >> 8) | (((n) & 0x00ff00ff) << 8)); \ - /* swap adjacent words */ \ - (n) = (((n) >> 16) | ((n) << 16)); \ - } while (0) -# else -# define byte_swap(n) \ - do { \ - /* swap adjacent bytes */ \ - (n) = ((((n) & 0xff00ff00ff00ff00) >> 8) | (((n) & 0x00ff00ff00ff00ff) << 8)); \ - /* swap adjacent words */ \ - (n) = ((((n) & 0xffff0000ffff0000) >> 16) | (((n) & 0x0000ffff0000ffff) << 16)); \ - /* swap adjacent double words */ \ - (n) = (((n) >> 32) | ((n) << 32)); \ - } while (0) -# endif -#endif - /* Addition and subtraction */ #if !defined(add_ssaaaa) # define add_ssaaaa(s1, s0, a1, a0, b1, b0) \ diff --git a/src/longlong_msc_arm64.h b/src/longlong_msc_arm64.h index 045a8d96f4..2147484279 100644 --- a/src/longlong_msc_arm64.h +++ b/src/longlong_msc_arm64.h @@ -26,9 +26,6 @@ static inline int flint_ctz(ulong x) return index; } -/* Byte swap */ -# define byte_swap(x) do { (x) = _byteswap_uint64(x); } while (0) - /* Multiplication */ #define umul_ppmm(r1, r0, u, v) \ do \ diff --git a/src/longlong_msc_x86.h b/src/longlong_msc_x86.h index 9a718b1533..79aa353b64 100644 --- a/src/longlong_msc_x86.h +++ b/src/longlong_msc_x86.h @@ -22,9 +22,6 @@ # define flint_clz _lzcnt_u32 # define flint_ctz _tzcnt_u32 -/* Byte swap */ -# define byte_swap(x) do { (x) = _byteswap_ulong(x); } while (0) - /* Addition and subtraction */ # define _FLINT_ADC _addcarry_u32 # define _FLINT_SBB _subborrow_u32 @@ -56,9 +53,6 @@ do \ # define flint_clz _lzcnt_u64 # define flint_ctz _tzcnt_u64 -/* Byte swap */ -# define byte_swap(x) do { (x) = _byteswap_uint64(x); } while (0) - /* Addition and subtraction */ # define _FLINT_ADC _addcarry_u64 # define _FLINT_SBB _subborrow_u64 diff --git a/src/test/main.c b/src/test/main.c index b4e844489c..0243785ac4 100644 --- a/src/test/main.c +++ b/src/test/main.c @@ -14,7 +14,6 @@ #include "t-add_ssaaaa.c" #include "t-add_sssaaaaaa.c" #include "t-add_ssssaaaaaaaa.c" -#include "t-byte_swap.c" #include "t-flint_clz.c" #include "t-flint_ctz.c" #include "t-io.c" @@ -34,7 +33,6 @@ test_struct tests[] = TEST_FUNCTION(add_ssaaaa), TEST_FUNCTION(add_sssaaaaaa), TEST_FUNCTION(add_ssssaaaaaaaa), - TEST_FUNCTION(byte_swap), TEST_FUNCTION(flint_clz), TEST_FUNCTION(flint_ctz), TEST_FUNCTION(flint_fprintf), diff --git a/src/test/t-byte_swap.c b/src/test/t-byte_swap.c deleted file mode 100644 index 6d26fd8b18..0000000000 --- a/src/test/t-byte_swap.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright (C) 2015 William Hart - - This file is part of FLINT. - - FLINT is free software: you can redistribute it and/or modify it under - the terms of the GNU Lesser General Public License (LGPL) as published - by the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. See . -*/ - -#include "ulong_extras.h" -#include "test_helpers.h" - -ulong byte_swap_naive(ulong n) -{ - ulong r = 0; - slong i; - - for (i = 0; i < sizeof(ulong); i++) - { - r <<= 8; - r |= (n & 0xFF); - n >>= 8; - } - - return r; -} - -TEST_FUNCTION_START(byte_swap, state) -{ - int i, result; - - for (i = 0; i < 10000 * flint_test_multiplier(); i++) - { - ulong n, r1, r2; - int cs; - - n = n_randtest(state); - r1 = n; - - cs = n_randint(state, 2); - - if (cs == 0) - { - /* byte_swap(byte_swap(n)) == n */ - r2 = n; - byte_swap(r2); - byte_swap(r2); - } - else - { - /* byte_swap(n) == byte_swap_naive(n) */ - r1 = n; - byte_swap(r1); - r2 = byte_swap_naive(n); - } - - result = (r1 == r2); - if (!result) - TEST_FUNCTION_FAIL( - "case %d\n" - "n = %wx, r1 = %wx, r2 = %wx\n", - n, r1, r2); - } - - TEST_FUNCTION_END(state); -} diff --git a/src/ulong_extras/revbin.c b/src/ulong_extras/revbin.c index fbb15d14a2..45482a2202 100644 --- a/src/ulong_extras/revbin.c +++ b/src/ulong_extras/revbin.c @@ -1,5 +1,6 @@ /* Copyright (C) 2009, 2015 William Hart + Copyright (C) 2024 Albin Ahlbäck This file is part of FLINT. @@ -9,9 +10,23 @@ (at your option) any later version. See . */ -#include "flint.h" #include "ulong_extras.h" +#if defined(__GNUC__) && FLINT64 && defined(__aarch64__) +# include +ulong +n_revbin(ulong n, ulong b) +{ + FLINT_ASSERT(b <= FLINT_BITS); + + n = __rbitll(n); + + if (b == 0) + return 0; + else + return n >> (FLINT_BITS - b); +} +#else static const unsigned char flint_revtab[] = { 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, @@ -47,9 +62,42 @@ static const unsigned char flint_revtab[] = { 0x3F, 0xBF, 0x7F, 0xFF }; -/* - Computes the reverse binary representation of a number of b bits. -*/ +#if defined(__GNUC__) +# if FLINT64 +# define byte_swap __builtin_bswap64 +# else +# define byte_swap __builtin_bswap32 +# endif +#elif defined(_MSC_VER) +# include +# if FLINT64 +# define byte_swap _byteswap_uint64 +# else +# define byte_swap _byteswap_ulong +# endif +#else +# if FLINT64 +FLINT_FORCE_INLINE byte_swap(ulong n) +{ + /* swap adjacent bytes */ + n = ((n & 0xff00ff00ff00ff00) >> 8) | ((n & 0x00ff00ff00ff00ff) << 8); + /* swap adjacent words */ + n = ((n & 0xffff0000ffff0000) >> 16) | ((n & 0x0000ffff0000ffff) << 16); + /* swap adjacent double words */ + n = (n >> 32) | (n << 32); + return n; +} +# else +FLINT_FORCE_INLINE byte_swap(ulong n) +{ + /* swap adjacent bytes */ + n = ((n & 0xff00ff00) >> 8) | ((n & 0x00ff00ff) << 8); + /* swap adjacent words */ + n = (n >> 16) | (n << 16); + return n; +} +# endif +#endif ulong n_revbin(ulong n, ulong b) @@ -83,8 +131,7 @@ n_revbin(ulong n, ulong b) n = (((n & 0xf0f0f0f0) >> 4) | ((n & 0x0f0f0f0f) << 4)); #endif - byte_swap(n); - - return n >> (FLINT_BITS - b); + return byte_swap(n) >> (FLINT_BITS - b); } } +#endif