Skip to content

Commit

Permalink
Add macros in crt_helpers.h for MSVC
Browse files Browse the repository at this point in the history
  • Loading branch information
albinahlback committed Nov 17, 2023
1 parent 22c836b commit 2788b2a
Showing 1 changed file with 75 additions and 2 deletions.
77 changes: 75 additions & 2 deletions src/crt_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ FLINT_FORCE_INLINE unsigned char _subborrow_ulong(unsigned char cf, ulong x, ulo

#if 1

#if defined(__AVX2__)
#if defined(__GNUC__) && defined(__AVX2__)

#define add_sssssaaaaaaaaaa(s4,s3,s2,s1,s0, a4,a3,a2,a1,a0, b4,b3,b2,b1,b0) \
__asm__ ("addq %14,%q4\n\tadcq %12,%q3\n\tadcq %10,%q2\n\tadcq %8,%q1\n\tadcq %6,%q0" \
Expand Down Expand Up @@ -187,7 +187,7 @@ FLINT_FORCE_INLINE unsigned char _subborrow_ulong(unsigned char cf, ulong x, ulo
"6" ((mp_limb_t)(a1)), "rme" ((mp_limb_t)(b1)), \
"7" ((mp_limb_t)(a0)), "rme" ((mp_limb_t)(b0)))

#elif defined(__ARM_NEON)
#elif defined(__GNUC__) && defined(__ARM_NEON)

#define add_sssssaaaaaaaaaa(s4, s3, s2, s1, s0, a4, a3, a2, a1, a0, b4, b3, b2, b1, b0) \
__asm__ ("adds %4,%9,%14\n\tadcs %3,%8,%13\n\tadcs %2,%7,%12\n\tadcs %1,%6,%11\n\tadc %0,%5,%10"\
Expand Down Expand Up @@ -253,6 +253,79 @@ FLINT_FORCE_INLINE unsigned char _subborrow_ulong(unsigned char cf, ulong x, ulo
"r" ((mp_limb_t)(b7)), "r" ((mp_limb_t)(b6)), "r" ((mp_limb_t)(b5)), "r" ((mp_limb_t)(b4)), "r" ((mp_limb_t)(b3)), "r" ((mp_limb_t)(b2)), "r" ((mp_limb_t)(b1)), "rI" ((mp_limb_t)(b0)) \
: "cc")

#elif defined(_MSC_VER) && (defined(__AVX2__) || defined(_M_ARM64))
#define add_sssssaaaaaaaaaa(s4, s3, s2, s1, s0, a4, a3, a2, a1, a0, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __tt; \
add_ssssaaaaaaaa(__tt, s2, s1, s0, (mp_limb_t) 0, a2, a1, a0, (mp_limb_t) 0, b2, b1, b0); \
add_ssaaaa(s4, s3, a4, a3, b4, b3); \
add_ssaaaa(s4, s3, s4, s3, (mp_limb_t) 0, __tt); \
} while (0)

#define add_ssssssaaaaaaaaaaaa(s5, s4, s3, s2, s1, s0, a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __tt; \
add_sssssaaaaaaaaaa(__tt, s3, s2, s1, s0, (mp_limb_t) 0, a3, a2, a1, a0, (mp_limb_t) 0, b3, b2, b1, b0);\
add_ssaaaa(s5, s4, a5, a4, b5, b4); \
add_ssaaaa(s5, s4, s5, s4, (mp_limb_t) 0, __tt); \
} while (0)

#define add_sssssssaaaaaaaaaaaaaa(s6, s5, s4, s3, s2, s1, s0, a6, a5, a4, a3, a2, a1, a0, b6, b5, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __tt; \
add_ssssssaaaaaaaaaaaa(__tt, s4, s3, s2, s1, s0, (mp_limb_t) 0, a4, a3, a2, a1, a0, (mp_limb_t) 0, b4, b3, b2, b1, b0); \
add_ssaaaa(s6, s5, a6, a5, b6, b5); \
add_ssaaaa(s6, s5, s6, s5, (mp_limb_t) 0, __tt); \
} while (0)

#define add_ssssssssaaaaaaaaaaaaaaaa(s7, s6, s5, s4, s3, s2, s1, s0, a7, a6, a5, a4, a3, a2, a1, a0, b7, b6, b5, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __tt; \
add_sssssssaaaaaaaaaaaaaa(__tt, s5, s4, s3, s2, s1, s0, (mp_limb_t) 0, a5, a4, a3, a2, a1, a0, (mp_limb_t) 0, b5, b4, b3, b2, b1, b0); \
add_ssaaaa(s7, s6, a7, a6, b7, b6); \
add_ssaaaa(s7, s6, s7, s6, (mp_limb_t) 0, __tt); \
} while (0)

#define sub_ddddmmmmssss(s3, s2, s1, s0, a3, a2, a1, a0, b3, b2, b1, b0) \
do { \
mp_limb_t __t, __u; \
sub_dddmmmsss(__t, s1, s0, (mp_limb_t) 0, a1, a0, (mp_limb_t) 0, b1, b0); \
sub_ddmmss(__u, s2, (mp_limb_t) 0, a2, (mp_limb_t) 0, b2); \
sub_ddmmss(s3, s2, (a3) - (b3), s2, -__u, -__t); \
} while (0)

#define sub_dddddmmmmmsssss(s4, s3, s2, s1, s0, a4, a3, a2, a1, a0, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __t, __u; \
sub_ddddmmmmssss(__t, s2, s1, s0, (mp_limb_t) 0, a2, a1, a0, (mp_limb_t) 0, b2, b1, b0);\
sub_ddmmss(__u, s3, (mp_limb_t) 0, a3, (mp_limb_t) 0, b3); \
sub_ddmmss(s4, s3, (a4) - (b4), s3, -__u, -__t); \
} while (0)

#define sub_ddddddmmmmmmssssss(s5, s4, s3, s2, s1, s0, a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __t, __u; \
sub_dddddmmmmmsssss(__t, s3, s2, s1, s0, (mp_limb_t) 0, a3, a2, a1, a0, (mp_limb_t) 0, b3, b2, b1, b0); \
sub_ddmmss(__u, s4, (mp_limb_t) 0, a4, (mp_limb_t) 0, b4); \
sub_ddmmss(s5, s4, (a5) - (b5), s4, -__u, -__t); \
} while (0)

#define sub_dddddddmmmmmmmsssssss(s6, s5, s4, s3, s2, s1, s0, a6, a5, a4, a3, a2, a1, a0, b6, b5, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __t, __u; \
sub_ddddddmmmmmmssssss(__t, s4, s3, s2, s1, s0, (mp_limb_t) 0, a4, a3, a2, a1, a0, (mp_limb_t) 0, b4, b3, b2, b1, b0); \
sub_ddmmss(__u, s5, (mp_limb_t) 0, a5, (mp_limb_t) 0, b5); \
sub_ddmmss(s6, s5, (a6) - (b6), s5, -__u, -__t); \
} while (0)

#define sub_ddddddddmmmmmmmmssssssss(s7, s6, s5, s4, s3, s2, s1, s0, a7, a6, a5, a4, a3, a2, a1, a0, b7, b6, b5, b4, b3, b2, b1, b0) \
do { \
mp_limb_t __t, __u; \
sub_dddddddmmmmmmmsssssss(__t, s5, s4, s3, s2, s1, s0, (mp_limb_t) 0, a5, a4, a3, a2, a1, a0, (mp_limb_t) 0, b5, b4, b3, b2, b1, b0); \
sub_ddmmss(__u, s6, (mp_limb_t) 0, a6, (mp_limb_t) 0, b6); \
sub_ddmmss(s7, s6, (a7) - (b7), s6, -__u, -__t); \
} while (0)

#else
# error crt_helpers.h requires AVX2 or Neon instructions
#endif
Expand Down

0 comments on commit 2788b2a

Please sign in to comment.