Skip to content

Commit

Permalink
Optimize n_revbin for Arm64
Browse files Browse the repository at this point in the history
And remove byte_swap from longlong.h
  • Loading branch information
albinahlback committed May 14, 2024
1 parent e8ed578 commit 4602f9a
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 122 deletions.
8 changes: 0 additions & 8 deletions doc/source/longlong.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,3 @@ Division

Works like ``udiv_qrnnd``, but takes a precomputed inverse ``di`` as
computed by ::func::`n_preinvert_limb`.

Miscellaneous
-------------------------------------------------------------------------------

.. macro:: byte_swap(x)

Swap the order of the bytes in the word `x`, i.e. most significant byte
becomes least significant byte, etc.
28 changes: 0 additions & 28 deletions src/longlong.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ extern "C" {
# define flint_ctz __builtin_ctzl
# endif

/* Byte swap */
# define _FLINT_CAT_(X,Y) X##Y
# define _FLINT_CAT(X,Y) _FLINT_CAT_(X,Y)
# define byte_swap(x) do { (x) = _FLINT_CAT(__builtin_bswap, FLINT_BITS)(x); } while (0)

/* Addition, subtraction and multiplication */
# if defined(__clang__)
# include "longlong_asm_clang.h"
Expand Down Expand Up @@ -97,29 +92,6 @@ static inline int flint_ctz(ulong x)
}
#endif

/* Byte swap */
#if !defined(byte_swap)
# if FLINT_BITS == 32
# define byte_swap(n) \
do { \
/* swap adjacent bytes */ \
(n) = ((((n) & 0xff00ff00) >> 8) | (((n) & 0x00ff00ff) << 8)); \
/* swap adjacent words */ \
(n) = (((n) >> 16) | ((n) << 16)); \
} while (0)
# else
# define byte_swap(n) \
do { \
/* swap adjacent bytes */ \
(n) = ((((n) & 0xff00ff00ff00ff00) >> 8) | (((n) & 0x00ff00ff00ff00ff) << 8)); \
/* swap adjacent words */ \
(n) = ((((n) & 0xffff0000ffff0000) >> 16) | (((n) & 0x0000ffff0000ffff) << 16)); \
/* swap adjacent double words */ \
(n) = (((n) >> 32) | ((n) << 32)); \
} while (0)
# endif
#endif

/* Addition and subtraction */
#if !defined(add_ssaaaa)
# define add_ssaaaa(s1, s0, a1, a0, b1, b0) \
Expand Down
3 changes: 0 additions & 3 deletions src/longlong_msc_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ static inline int flint_ctz(ulong x)
return index;
}

/* Byte swap */
# define byte_swap(x) do { (x) = _byteswap_uint64(x); } while (0)

/* Multiplication */
#define umul_ppmm(r1, r0, u, v) \
do \
Expand Down
6 changes: 0 additions & 6 deletions src/longlong_msc_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
# define flint_clz _lzcnt_u32
# define flint_ctz _tzcnt_u32

/* Byte swap */
# define byte_swap(x) do { (x) = _byteswap_ulong(x); } while (0)

/* Addition and subtraction */
# define _FLINT_ADC _addcarry_u32
# define _FLINT_SBB _subborrow_u32
Expand Down Expand Up @@ -56,9 +53,6 @@ do \
# define flint_clz _lzcnt_u64
# define flint_ctz _tzcnt_u64

/* Byte swap */
# define byte_swap(x) do { (x) = _byteswap_uint64(x); } while (0)

/* Addition and subtraction */
# define _FLINT_ADC _addcarry_u64
# define _FLINT_SBB _subborrow_u64
Expand Down
2 changes: 0 additions & 2 deletions src/test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "t-add_ssaaaa.c"
#include "t-add_sssaaaaaa.c"
#include "t-add_ssssaaaaaaaa.c"
#include "t-byte_swap.c"
#include "t-flint_clz.c"
#include "t-flint_ctz.c"
#include "t-io.c"
Expand All @@ -34,7 +33,6 @@ test_struct tests[] =
TEST_FUNCTION(add_ssaaaa),
TEST_FUNCTION(add_sssaaaaaa),
TEST_FUNCTION(add_ssssaaaaaaaa),
TEST_FUNCTION(byte_swap),
TEST_FUNCTION(flint_clz),
TEST_FUNCTION(flint_ctz),
TEST_FUNCTION(flint_fprintf),
Expand Down
68 changes: 0 additions & 68 deletions src/test/t-byte_swap.c

This file was deleted.

61 changes: 54 additions & 7 deletions src/ulong_extras/revbin.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
Copyright (C) 2009, 2015 William Hart
Copyright (C) 2024 Albin Ahlbäck
This file is part of FLINT.
Expand All @@ -9,9 +10,23 @@
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/

#include "flint.h"
#include "ulong_extras.h"

#if defined(__GNUC__) && FLINT64 && defined(__aarch64__)
# include <arm_acle.h>
ulong
n_revbin(ulong n, ulong b)
{
FLINT_ASSERT(b <= FLINT_BITS);

n = __rbitll(n);

if (b == 0)
return 0;
else
return n >> (FLINT_BITS - b);
}
#else
static const unsigned char flint_revtab[] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
0x30, 0xB0, 0x70, 0xF0,
Expand Down Expand Up @@ -47,9 +62,42 @@ static const unsigned char flint_revtab[] = {
0x3F, 0xBF, 0x7F, 0xFF
};

/*
Computes the reverse binary representation of a number of b bits.
*/
#if defined(__GNUC__)
# if FLINT64
# define byte_swap __builtin_bswap64
# else
# define byte_swap __builtin_bswap32
# endif
#elif defined(_MSC_VER)
# include <stdlib.h>
# if FLINT64
# define byte_swap _byteswap_uint64
# else
# define byte_swap _byteswap_ulong
# endif
#else
# if FLINT64
FLINT_FORCE_INLINE byte_swap(ulong n)
{
/* swap adjacent bytes */
n = ((n & 0xff00ff00ff00ff00) >> 8) | ((n & 0x00ff00ff00ff00ff) << 8);
/* swap adjacent words */
n = ((n & 0xffff0000ffff0000) >> 16) | ((n & 0x0000ffff0000ffff) << 16);
/* swap adjacent double words */
n = (n >> 32) | (n << 32);
return n;
}
# else
FLINT_FORCE_INLINE byte_swap(ulong n)
{
/* swap adjacent bytes */
n = ((n & 0xff00ff00) >> 8) | ((n & 0x00ff00ff) << 8);
/* swap adjacent words */
n = (n >> 16) | (n << 16);
return n;
}
# endif
#endif

ulong
n_revbin(ulong n, ulong b)
Expand Down Expand Up @@ -83,8 +131,7 @@ n_revbin(ulong n, ulong b)
n = (((n & 0xf0f0f0f0) >> 4) | ((n & 0x0f0f0f0f) << 4));
#endif

byte_swap(n);

return n >> (FLINT_BITS - b);
return byte_swap(n) >> (FLINT_BITS - b);
}
}
#endif

0 comments on commit 4602f9a

Please sign in to comment.