diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index d3362c4645..3b4777728d 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -62,9 +62,9 @@ Types, macros and constants NFLOAT_MAX_LIMBS The number of limbs `n` permitted as precision. The current - limits are are `1 \le n \le 33` on a 64-bit machine and - `1 \le n \le 66` on a 32-bit machine, permitting precision - up to 2112 bits. The upper limit exists so that elements and + limits are are `1 \le n \le 66` on a 64-bit machine and + `1 \le n \le 132` on a 32-bit machine, permitting precision + up to 4224 bits. The upper limit exists so that elements and temporary buffers are safe to allocate on the stack and so that simple operations like swapping are not too expensive. @@ -84,6 +84,7 @@ Types, macros and constants nfloat512_struct nfloat1024_struct nfloat2048_struct + nfloat4096_struct nfloat64_t nfloat128_t nfloat192_t @@ -92,6 +93,7 @@ Types, macros and constants nfloat512_t nfloat1024_t nfloat2048_t + nfloat4096_t For convenience we define types of the correct structure size for some common levels of bit precision. An ``nfloatX_t`` is defined as @@ -254,6 +256,7 @@ These methods are interchangeable with their ``gr`` counterparts. int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) int nfloat_submul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) int nfloat_addmul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int nfloat_sqr(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) .. function:: int nfloat_mul_2exp_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) @@ -329,3 +332,69 @@ Internal functions int _nfloat_sub_4(nfloat_ptr res, nn_srcptr x, slong xexp, int xsgnbit, nn_srcptr y, slong delta, gr_ctx_t ctx) int _nfloat_add_n(nfloat_ptr res, nn_srcptr xd, slong xexp, int xsgnbit, nn_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx) int _nfloat_sub_n(nfloat_ptr res, nn_srcptr xd, slong xexp, int xsgnbit, nn_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx) + +Complex numbers +------------------------------------------------------------------------------- + +Complex floating-point numbers have the obvious representation as +real pairs. + +.. type:: nfloat_complex_ptr + nfloat_complex_srcptr + +.. function:: int nfloat_complex_ctx_init(gr_ctx_t ctx, slong prec, int flags) + +.. macro:: NFLOAT_COMPLEX_CTX_DATA_NLIMBS(ctx) + NFLOAT_COMPLEX_RE(ptr, ctx) + NFLOAT_COMPLEX_IM(ptr, ctx) + NFLOAT_COMPLEX_IS_SPECIAL(x, ctx) + NFLOAT_COMPLEX_IS_ZERO(x, ctx) + +.. function:: void nfloat_complex_init(nfloat_complex_ptr res, gr_ctx_t ctx) + void nfloat_complex_clear(nfloat_complex_ptr res, gr_ctx_t ctx) + int nfloat_complex_zero(nfloat_complex_ptr res, gr_ctx_t ctx) + int nfloat_complex_get_acf(acf_t res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_set_acf(nfloat_complex_ptr res, const acf_t x, gr_ctx_t ctx) + int nfloat_complex_get_acb(acb_t res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_set_acb(nfloat_complex_ptr res, const acb_t x, gr_ctx_t ctx) + int nfloat_complex_write(gr_stream_t out, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_randtest(nfloat_complex_ptr res, flint_rand_t state, gr_ctx_t ctx) + void nfloat_complex_swap(nfloat_complex_ptr x, nfloat_complex_ptr y, gr_ctx_t ctx) + int nfloat_complex_set(nfloat_complex_ptr res, nfloat_complex_ptr x, gr_ctx_t ctx) + int nfloat_complex_one(nfloat_complex_ptr res, gr_ctx_t ctx) + int nfloat_complex_neg_one(nfloat_complex_ptr res, gr_ctx_t ctx) + truth_t nfloat_complex_is_zero(nfloat_complex_srcptr x, gr_ctx_t ctx) + truth_t nfloat_complex_is_one(nfloat_complex_srcptr x, gr_ctx_t ctx) + truth_t nfloat_complex_is_neg_one(nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_i(nfloat_complex_ptr res, gr_ctx_t ctx) + int nfloat_complex_pi(nfloat_complex_ptr res, gr_ctx_t ctx) + int nfloat_complex_conj(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_re(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_im(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) + truth_t nfloat_complex_equal(nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) + int nfloat_complex_set_si(nfloat_complex_ptr res, slong x, gr_ctx_t ctx) + int nfloat_complex_set_ui(nfloat_complex_ptr res, ulong x, gr_ctx_t ctx) + int nfloat_complex_set_fmpz(nfloat_complex_ptr res, const fmpz_t x, gr_ctx_t ctx) + int nfloat_complex_set_fmpq(nfloat_complex_ptr res, const fmpq_t x, gr_ctx_t ctx) + int nfloat_complex_set_d(nfloat_complex_ptr res, double x, gr_ctx_t ctx) + int nfloat_complex_neg(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_add(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) + int nfloat_complex_sub(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) + int _nfloat_complex_sqr_naive(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) + int _nfloat_complex_sqr_standard(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) + int _nfloat_complex_sqr_karatsuba(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) + int _nfloat_complex_sqr(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) + int nfloat_complex_sqr(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int _nfloat_complex_mul_naive(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx) + int _nfloat_complex_mul_standard(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx) + int _nfloat_complex_mul_karatsuba(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx) + int nfloat_complex_mul(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) + int nfloat_complex_inv(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) + int nfloat_complex_div(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) + void _nfloat_complex_vec_init(nfloat_complex_ptr res, slong len, gr_ctx_t ctx) + void _nfloat_complex_vec_clear(nfloat_complex_ptr res, slong len, gr_ctx_t ctx) + int _nfloat_complex_vec_zero(nfloat_complex_ptr res, slong len, gr_ctx_t ctx) + int _nfloat_complex_vec_set(nfloat_complex_ptr res, nfloat_complex_srcptr x, slong len, gr_ctx_t ctx) + int _nfloat_complex_vec_add(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, slong len, gr_ctx_t ctx) + int _nfloat_complex_vec_sub(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, slong len, gr_ctx_t ctx) + diff --git a/src/gr.h b/src/gr.h index 7281062d1b..274d2b6117 100644 --- a/src/gr.h +++ b/src/gr.h @@ -681,7 +681,7 @@ typedef enum GR_CTX_COMPLEX_EXTENDED_CA, GR_CTX_RR_ARB, GR_CTX_CC_ACB, GR_CTX_REAL_FLOAT_ARF, GR_CTX_COMPLEX_FLOAT_ACF, - GR_CTX_NFLOAT, + GR_CTX_NFLOAT, GR_CTX_NFLOAT_COMPLEX, GR_CTX_FMPZ_POLY, GR_CTX_FMPQ_POLY, GR_CTX_GR_POLY, GR_CTX_FMPZ_MPOLY, GR_CTX_GR_MPOLY, GR_CTX_FMPZ_MPOLY_Q, diff --git a/src/gr/acb.c b/src/gr/acb.c index 3a8533b721..3f7cb4e08a 100644 --- a/src/gr/acb.c +++ b/src/gr/acb.c @@ -27,6 +27,7 @@ #include "gr_generic.h" #include "gr_vec.h" #include "gr_poly.h" +#include "nfloat.h" typedef struct { @@ -281,6 +282,18 @@ _gr_acb_set_other(acb_t res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx) return GR_DOMAIN; } + case GR_CTX_NFLOAT_COMPLEX: + if (NFLOAT_CTX_HAS_INF_NAN(x_ctx)) /* todo */ + { + return GR_UNABLE; + } + else + { + nfloat_complex_get_acb(res, x, x_ctx); + acb_set_round(res, res, ACB_CTX_PREC(ctx)); + return GR_SUCCESS; + } + case GR_CTX_RR_ARB: arb_set_round(acb_realref(res), x, ACB_CTX_PREC(ctx)); arb_zero(acb_imagref(res)); @@ -958,6 +971,39 @@ _gr_acb_arg(acb_t res, const acb_t x, const gr_ctx_t ctx) return GR_SUCCESS; } +int +_gr_acb_cmp(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx) +{ + if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y)) && + ((arb_is_exact(acb_realref(x)) && arb_is_exact(acb_realref(y))) || !arb_overlaps(acb_realref(x), acb_realref(y)))) + { + *res = arf_cmp(arb_midref(acb_realref(x)), arb_midref(acb_realref(y))); + return GR_SUCCESS; + } + else + { + *res = 0; + return GR_UNABLE; + } +} + +int +_gr_acb_cmpabs(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx) +{ + acb_t t, u; + + *t = *x; + *u = *y; + + if (arf_sgn(arb_midref(acb_realref(t))) < 0) + ARF_NEG(arb_midref(acb_realref(t))); + + if (arf_sgn(arb_midref(acb_realref(u))) < 0) + ARF_NEG(arb_midref(acb_realref(u))); + + return _gr_acb_cmp(res, t, u, ctx); +} + int _gr_acb_pi(acb_t res, const gr_ctx_t ctx) { @@ -2172,6 +2218,8 @@ gr_method_tab_input _acb_methods_input[] = {GR_METHOD_SGN, (gr_funcptr) _gr_acb_sgn}, {GR_METHOD_CSGN, (gr_funcptr) _gr_acb_csgn}, {GR_METHOD_ARG, (gr_funcptr) _gr_acb_arg}, + {GR_METHOD_CMP, (gr_funcptr) _gr_acb_cmp}, + {GR_METHOD_CMPABS, (gr_funcptr) _gr_acb_cmpabs}, {GR_METHOD_PI, (gr_funcptr) _gr_acb_pi}, {GR_METHOD_EXP, (gr_funcptr) _gr_acb_exp}, {GR_METHOD_EXPM1, (gr_funcptr) _gr_acb_expm1}, diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c index ae5d3d43eb..7e7f5a4442 100644 --- a/src/gr/test_ring.c +++ b/src/gr/test_ring.c @@ -1369,12 +1369,16 @@ gr_test_zero_one(gr_ctx_t R, flint_rand_t state, int test_flags) } status |= gr_randtest(a, state, R); - status |= gr_one(a, R); - status |= gr_neg(a, a, R); + status |= gr_neg_one(a, R); equal = gr_is_neg_one(a, R); if (status == GR_SUCCESS && equal == T_FALSE) status = GR_TEST_FAIL; + status |= gr_neg(a, a, R); + equal = gr_is_one(a, R); + if (status == GR_SUCCESS && equal == T_FALSE) + status = GR_TEST_FAIL; + if ((test_flags & GR_TEST_ALWAYS_ABLE) && (status & GR_UNABLE)) status = GR_TEST_FAIL; @@ -3939,7 +3943,8 @@ gr_test_floating_point(gr_ctx_t R, slong iters, int test_flags) gr_test_iter(R, state, "add: aliasing", gr_test_add_aliasing, iters, test_flags); gr_test_iter(R, state, "sub: equal neg add", gr_test_sub_equal_neg_add, iters, test_flags); gr_test_iter(R, state, "sub: aliasing", gr_test_sub_aliasing, iters, test_flags); - gr_test_iter(R, state, "mul: commutative", gr_test_mul_commutative, iters, test_flags); + /* can fail for complex */ + /* gr_test_iter(R, state, "mul: commutative", gr_test_mul_commutative, iters, test_flags); */ gr_test_iter(R, state, "mul: aliasing", gr_test_mul_aliasing, iters, test_flags); gr_test_iter(R, state, "div: aliasing", gr_test_div_aliasing, iters, test_flags); gr_test_iter(R, state, "pow: aliasing", gr_test_pow_aliasing, iters, test_flags); diff --git a/src/mpn_extras.h b/src/mpn_extras.h index 365776e11f..fd9b346df9 100644 --- a/src/mpn_extras.h +++ b/src/mpn_extras.h @@ -264,6 +264,20 @@ char * _flint_mpn_get_str(mp_srcptr x, mp_size_t n); (r0) = __r0; (r1) = __r1; (r2) = __r2; \ } while (0) +#define FLINT_MPN_SQR_2X2(r3, r2, r1, r0, a1, a0) \ + do { \ + mp_limb_t __t1, __t2, __t3; \ + mp_limb_t __r3, __r2, __r1, __r0; \ + mp_limb_t __a1 = (a1), __a0 = (a0); \ + umul_ppmm(__t2, __t1, __a0, __a1); \ + add_sssaaaaaa(__t3, __t2, __t1, 0, __t2, __t1, 0, __t2, __t1); \ + umul_ppmm(__r1, __r0, __a0, __a0); \ + umul_ppmm(__r3, __r2, __a1, __a1); \ + add_sssaaaaaa(__r3, __r2, __r1, __r3, __r2, __r1, __t3, __t2, __t1); \ + (r0) = __r0; (r1) = __r1; (r2) = __r2; (r3) = __r3; \ + } while (0) + + /* {s0,s1,s2} = u[0]v[n-1] + u[1]v[n-2] + ... */ /* Assumes n >= 2 */ #define NN_DOTREV_S3_1X1(s2, s1, s0, u, v, n) \ diff --git a/src/nfloat.h b/src/nfloat.h index 442d0e81a1..199402794c 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -30,10 +30,10 @@ extern "C" { expensive compared to a pointer-and-size representation. */ #if FLINT_BITS == 64 #define NFLOAT_MIN_LIMBS 1 -#define NFLOAT_MAX_LIMBS 33 +#define NFLOAT_MAX_LIMBS 66 #else #define NFLOAT_MIN_LIMBS 1 -#define NFLOAT_MAX_LIMBS 66 +#define NFLOAT_MAX_LIMBS 132 #endif /* Number of header limbs used to encode sign + exponent. We use a @@ -101,6 +101,7 @@ typedef struct { ulong head[NFLOAT_HEADER_LIMBS]; ulong d[384 / FLINT_BITS]; } n typedef struct { ulong head[NFLOAT_HEADER_LIMBS]; ulong d[512 / FLINT_BITS]; } nfloat512_struct; typedef struct { ulong head[NFLOAT_HEADER_LIMBS]; ulong d[1024 / FLINT_BITS]; } nfloat1024_struct; typedef struct { ulong head[NFLOAT_HEADER_LIMBS]; ulong d[2048 / FLINT_BITS]; } nfloat2048_struct; +typedef struct { ulong head[NFLOAT_HEADER_LIMBS]; ulong d[4096 / FLINT_BITS]; } nfloat4096_struct; typedef nfloat64_struct nfloat64_t[1]; typedef nfloat128_struct nfloat128_t[1]; @@ -110,6 +111,7 @@ typedef nfloat384_struct nfloat384_t[1]; typedef nfloat512_struct nfloat512_t[1]; typedef nfloat1024_struct nfloat1024_t[1]; typedef nfloat2048_struct nfloat2048_t[1]; +typedef nfloat4096_struct nfloat4096_t[1]; #define LIMB_MSB_IS_SET(n) ((slong) (n) < 0) @@ -126,6 +128,7 @@ nfloat_init(nfloat_ptr res, gr_ctx_t ctx) NFLOAT_INLINE void nfloat_clear(nfloat_ptr res, gr_ctx_t ctx) { + FLINT_ASSERT(NFLOAT_IS_SPECIAL(res) || LIMB_MSB_IS_SET(NFLOAT_D(res)[NFLOAT_CTX_NLIMBS(ctx)-1])); } void nfloat_swap(nfloat_ptr x, nfloat_ptr y, gr_ctx_t ctx); @@ -262,6 +265,106 @@ nfloat_set_mpn_2exp(nfloat_ptr res, nn_srcptr x, slong xn, slong exp, int xsgnbi return _nfloat_set_mpn_2exp(res, x, xn, exp, xsgnbit, ctx); } +NFLOAT_INLINE int +nfloat_1_set_2_2exp(nfloat_ptr res, ulong x1, ulong x0, slong exp, int xsgnbit, gr_ctx_t ctx) +{ + slong norm; + + if (x1 == 0) + { + if (x0 == 0) + return nfloat_zero(res, ctx); + + norm = flint_clz(x0); + NFLOAT_EXP(res) = exp - FLINT_BITS - norm; + NFLOAT_SGNBIT(res) = xsgnbit; + NFLOAT_D(res)[0] = x0 << norm; + } + else if (LIMB_MSB_IS_SET(x1)) + { + NFLOAT_EXP(res) = exp; + NFLOAT_SGNBIT(res) = xsgnbit; + NFLOAT_D(res)[0] = x1; + } + else + { + norm = flint_clz(x1); + NFLOAT_EXP(res) = exp - norm; + NFLOAT_SGNBIT(res) = xsgnbit; + NFLOAT_D(res)[0] = (x1 << norm) | (x0 >> (FLINT_BITS - norm)); + } + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +NFLOAT_INLINE int +nfloat_1_set_3_2exp(nfloat_ptr res, ulong x2, ulong x1, ulong x0, slong exp, int xsgnbit, gr_ctx_t ctx) +{ + if (x2 == 0) + return nfloat_1_set_2_2exp(res, x1, x0, exp - FLINT_BITS, xsgnbit, ctx); + else + return nfloat_1_set_2_2exp(res, x2, x1, exp, xsgnbit, ctx); +} + +NFLOAT_INLINE int +nfloat_2_set_3_2exp(nfloat_ptr res, ulong x2, ulong x1, ulong x0, slong exp, int xsgnbit, gr_ctx_t ctx) +{ + slong norm; + + if (x2 == 0) + { + if (x1 == 0) + { + if (x0 == 0) + return nfloat_zero(res, ctx); + + norm = flint_clz(x0); + exp = exp - 2 * FLINT_BITS - norm; + x1 = 0; + x2 = x0 << norm; + } + else if (LIMB_MSB_IS_SET(x1)) + { + exp = exp - FLINT_BITS; + x2 = x1; + x1 = x0; + } + else + { + norm = flint_clz(x1); + exp = exp - FLINT_BITS - norm; + x2 = (x1 << norm) | (x0 >> (FLINT_BITS - norm)); + x1 = (x0 << norm); + } + } + else if (!LIMB_MSB_IS_SET(x2)) + { + norm = flint_clz(x2); + exp = exp - norm; + x2 = (x2 << norm) | (x1 >> (FLINT_BITS - norm)); + x1 = (x1 << norm) | (x0 >> (FLINT_BITS - norm)); + } + + NFLOAT_EXP(res) = exp; + NFLOAT_SGNBIT(res) = xsgnbit; + NFLOAT_D(res)[0] = x1; + NFLOAT_D(res)[1] = x2; + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +NFLOAT_INLINE int +nfloat_2_set_4_2exp(nfloat_ptr res, ulong x3, ulong x2, ulong x1, ulong x0, slong exp, int xsgnbit, gr_ctx_t ctx) +{ + if (x3 == 0) + return nfloat_2_set_3_2exp(res, x2, x1, x0, exp - FLINT_BITS, xsgnbit, ctx); + else + return nfloat_2_set_3_2exp(res, x3, x2, x1, exp, xsgnbit, ctx); +} + + + int nfloat_set_fmpz(nfloat_ptr res, const fmpz_t x, gr_ctx_t ctx); #ifdef ARF_H @@ -302,6 +405,7 @@ int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_addmul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_submul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_sqr(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); int nfloat_mul_2exp_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx); @@ -349,6 +453,103 @@ int _nfloat_vec_submul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); +/* Complex numbers */ +/* Note: we use the same context data for real and complex rings + (only which_ring and sizeof_elem differ). This allows us to call + nfloat methods on the real and imaginary parts without creating + a temporary nfloat context object, as long as the nfloat methods + don't call any generic gr methods internally. +*/ + +typedef nfloat_ptr nfloat_complex_ptr; +typedef nfloat_srcptr nfloat_complex_srcptr; + +int nfloat_complex_ctx_init(gr_ctx_t ctx, slong prec, int flags); + +#define NFLOAT_COMPLEX_CTX_DATA_NLIMBS(ctx) (2 * NFLOAT_CTX_DATA_NLIMBS(ctx)) + +#define NFLOAT_COMPLEX_RE(ptr, ctx) (ptr) +#define NFLOAT_COMPLEX_IM(ptr, ctx) ((nn_ptr) (ptr) + NFLOAT_CTX_DATA_NLIMBS(ctx)) + +#define NFLOAT_COMPLEX_IS_SPECIAL(x, ctx) (NFLOAT_IS_SPECIAL(NFLOAT_COMPLEX_RE(x, ctx)) || NFLOAT_IS_SPECIAL(NFLOAT_COMPLEX_IM(x, ctx))) +#define NFLOAT_COMPLEX_IS_ZERO(x, ctx) (NFLOAT_IS_ZERO(NFLOAT_COMPLEX_RE(x, ctx)) && NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(x, ctx))) + + +NFLOAT_INLINE void +nfloat_complex_init(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + nfloat_init(NFLOAT_COMPLEX_RE(res, ctx), ctx); + nfloat_init(NFLOAT_COMPLEX_IM(res, ctx), ctx); +} + +NFLOAT_INLINE void +nfloat_complex_clear(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + FLINT_ASSERT(NFLOAT_IS_SPECIAL(NFLOAT_COMPLEX_RE(res, ctx)) || LIMB_MSB_IS_SET(NFLOAT_D(NFLOAT_COMPLEX_RE(res, ctx))[NFLOAT_CTX_NLIMBS(ctx)-1])); + FLINT_ASSERT(NFLOAT_IS_SPECIAL(NFLOAT_COMPLEX_IM(res, ctx)) || LIMB_MSB_IS_SET(NFLOAT_D(NFLOAT_COMPLEX_IM(res, ctx))[NFLOAT_CTX_NLIMBS(ctx)-1])); +} + +NFLOAT_INLINE int +nfloat_complex_zero(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_RE(res, ctx), ctx); + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +#ifdef ACF_H +int nfloat_complex_get_acf(acf_t res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_set_acf(nfloat_complex_ptr res, const acf_t x, gr_ctx_t ctx); +#endif + +#ifdef ACB_H +int nfloat_complex_get_acb(acb_t res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_set_acb(nfloat_complex_ptr res, const acb_t x, gr_ctx_t ctx); +#endif + +int nfloat_complex_write(gr_stream_t out, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_randtest(nfloat_complex_ptr res, flint_rand_t state, gr_ctx_t ctx); + +void nfloat_complex_swap(nfloat_complex_ptr x, nfloat_complex_ptr y, gr_ctx_t ctx); +int nfloat_complex_set(nfloat_complex_ptr res, nfloat_complex_ptr x, gr_ctx_t ctx); +int nfloat_complex_one(nfloat_complex_ptr res, gr_ctx_t ctx); +int nfloat_complex_neg_one(nfloat_complex_ptr res, gr_ctx_t ctx); +truth_t nfloat_complex_is_zero(nfloat_complex_srcptr x, gr_ctx_t ctx); +truth_t nfloat_complex_is_one(nfloat_complex_srcptr x, gr_ctx_t ctx); +truth_t nfloat_complex_is_neg_one(nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_i(nfloat_complex_ptr res, gr_ctx_t ctx); +int nfloat_complex_pi(nfloat_complex_ptr res, gr_ctx_t ctx); +int nfloat_complex_conj(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_re(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_im(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx); +truth_t nfloat_complex_equal(nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx); +int nfloat_complex_set_si(nfloat_complex_ptr res, slong x, gr_ctx_t ctx); +int nfloat_complex_set_ui(nfloat_complex_ptr res, ulong x, gr_ctx_t ctx); +int nfloat_complex_set_fmpz(nfloat_complex_ptr res, const fmpz_t x, gr_ctx_t ctx); +int nfloat_complex_set_fmpq(nfloat_complex_ptr res, const fmpq_t x, gr_ctx_t ctx); +int nfloat_complex_set_d(nfloat_complex_ptr res, double x, gr_ctx_t ctx); +int nfloat_complex_neg(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_add(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx); +int nfloat_complex_sub(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx); +int _nfloat_complex_sqr_naive(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx); +int _nfloat_complex_sqr_standard(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx); +int _nfloat_complex_sqr_karatsuba(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx); +int _nfloat_complex_sqr(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx); +int nfloat_complex_sqr(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int _nfloat_complex_mul_naive(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx); +int _nfloat_complex_mul_standard(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx); +int _nfloat_complex_mul_karatsuba(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx); +int nfloat_complex_mul(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx); +int nfloat_complex_inv(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx); +int nfloat_complex_div(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx); + +void _nfloat_complex_vec_init(nfloat_complex_ptr res, slong len, gr_ctx_t ctx); +void _nfloat_complex_vec_clear(nfloat_complex_ptr res, slong len, gr_ctx_t ctx); +int _nfloat_complex_vec_zero(nfloat_complex_ptr res, slong len, gr_ctx_t ctx); +int _nfloat_complex_vec_set(nfloat_complex_ptr res, nfloat_complex_srcptr x, slong len, gr_ctx_t ctx); +int _nfloat_complex_vec_add(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, slong len, gr_ctx_t ctx); +int _nfloat_complex_vec_sub(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, slong len, gr_ctx_t ctx); + #ifdef __cplusplus } #endif diff --git a/src/nfloat/complex.c b/src/nfloat/complex.c new file mode 100644 index 0000000000..6ec103ad2b --- /dev/null +++ b/src/nfloat/complex.c @@ -0,0 +1,1708 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "mpn_extras.h" +#include "gr.h" +#include "gr_mat.h" +#include "gr_generic.h" +#include "acf.h" +#include "acb.h" +#include "nfloat.h" + +int +_flint_mpn_signed_add_n(nn_ptr res, nn_srcptr x, int xsgnbit, nn_srcptr y, int ysgnbit, mp_size_t n) +{ + if (xsgnbit == ysgnbit) + mpn_add_n(res, x, y, n); + else + { + if (mpn_cmp(x, y, n) >= 0) + mpn_sub_n(res, x, y, n); + else + { + mpn_sub_n(res, y, x, n); + xsgnbit = !xsgnbit; + } + } + + return xsgnbit; +} + +/* todo: define in longlong.h */ +#if FLINT_BITS == 64 && defined(__GNUC__) && defined(__AVX2__) + +#define add_sssssaaaaaaaaaa(s4,s3,s2,s1,s0, a4,a3,a2,a1,a0, b4,b3,b2,b1,b0) \ + __asm__ ("addq %14,%q4\n\tadcq %12,%q3\n\tadcq %10,%q2\n\tadcq %8,%q1\n\tadcq %6,%q0" \ + : "=r" (s4), "=&r" (s3), "=&r" (s2), "=&r" (s1), "=&r" (s0) \ + : "0" ((ulong)(a4)), "rme" ((ulong)(b4)), \ + "1" ((ulong)(a3)), "rme" ((ulong)(b3)), \ + "2" ((ulong)(a2)), "rme" ((ulong)(b2)), \ + "3" ((ulong)(a1)), "rme" ((ulong)(b1)), \ + "4" ((ulong)(a0)), "rme" ((ulong)(b0))) + +#define add_ssssssaaaaaaaaaaaa(s5,s4,s3,s2,s1,s0, a5,a4,a3,a2,a1,a0, b5,b4,b3,b2,b1,b0) \ + __asm__ ("addq %17,%q5\nadcq %15,%q4\n\tadcq %13,%q3\n\tadcq %11,%q2\n\tadcq %9,%q1\n\tadcq %7,%q0" \ + : "=r" (s5), "=&r" (s4), "=&r" (s3), "=&r" (s2), "=&r" (s1), "=&r" (s0) \ + : "0" ((ulong)(a5)), "rme" ((ulong)(b5)), \ + "1" ((ulong)(a4)), "rme" ((ulong)(b4)), \ + "2" ((ulong)(a3)), "rme" ((ulong)(b3)), \ + "3" ((ulong)(a2)), "rme" ((ulong)(b2)), \ + "4" ((ulong)(a1)), "rme" ((ulong)(b1)), \ + "5" ((ulong)(a0)), "rme" ((ulong)(b0))) + +#define sub_ddddmmmmssss(s3, s2, s1, s0, a3, a2, a1, a0, b3, b2, b1, b0) \ + __asm__ ("subq %11,%q3\n\tsbbq %9,%q2\n\tsbbq %7,%q1\n\tsbbq %5,%q0" \ + : "=r" (s3), "=&r" (s2), "=&r" (s1), "=&r" (s0) \ + : "0" ((ulong)(a3)), "rme" ((ulong)(b3)), \ + "1" ((ulong)(a2)), "rme" ((ulong)(b2)), \ + "2" ((ulong)(a1)), "rme" ((ulong)(b1)), \ + "3" ((ulong)(a0)), "rme" ((ulong)(b0))) + +#define sub_dddddmmmmmsssss(s4,s3,s2,s1,s0, a4,a3,a2,a1,a0, b4,b3,b2,b1,b0) \ + __asm__ ("subq %14,%q4\n\tsbbq %12,%q3\n\tsbbq %10,%q2\n\tsbbq %8,%q1\n\tsbbq %6,%q0" \ + : "=r" (s4), "=&r" (s3), "=&r" (s2), "=&r" (s1), "=&r" (s0) \ + : "0" ((ulong)(a4)), "rme" ((ulong)(b4)), \ + "1" ((ulong)(a3)), "rme" ((ulong)(b3)), \ + "2" ((ulong)(a2)), "rme" ((ulong)(b2)), \ + "3" ((ulong)(a1)), "rme" ((ulong)(b1)), \ + "4" ((ulong)(a0)), "rme" ((ulong)(b0))) +#else + +#define add_sssssaaaaaaaaaa(s4, s3, s2, s1, s0, a4, a3, a2, a1, a0, b4, b3, b2, b1, b0) \ + do { \ + ulong __t0 = 0; \ + add_ssssaaaaaaaa(__t0, s2, s1, s0, (ulong) 0, a2, a1, a0, (ulong) 0, b2, b1, b0); \ + add_ssaaaa(s4, s3, a4, a3, b4, b3); \ + add_ssaaaa(s4, s3, s4, s3, (ulong) 0, __t0); \ + } while (0) + +#define add_ssssssaaaaaaaaaaaa(s5, s4, s3, s2, s1, s0, a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0) \ + do { \ + ulong __t1 = 0; \ + add_sssssaaaaaaaaaa(__t1, s3, s2, s1, s0, (ulong) 0, a3, a2, a1, a0, (ulong) 0, b3, b2, b1, b0);\ + add_ssaaaa(s5, s4, a5, a4, b5, b4); \ + add_ssaaaa(s5, s4, s5, s4, (ulong) 0, __t1); \ + } while (0) + +#define sub_ddddmmmmssss(s3, s2, s1, s0, a3, a2, a1, a0, b3, b2, b1, b0) \ + do { \ + ulong __t1, __u1; \ + sub_dddmmmsss(__t1, s1, s0, (ulong) 0, a1, a0, (ulong) 0, b1, b0); \ + sub_ddmmss(__u1, s2, (ulong) 0, a2, (ulong) 0, b2); \ + sub_ddmmss(s3, s2, (a3) - (b3), s2, -__u1, -__t1); \ + } while (0) + +#define sub_dddddmmmmmsssss(s4, s3, s2, s1, s0, a4, a3, a2, a1, a0, b4, b3, b2, b1, b0) \ + do { \ + ulong __t2, __u2; \ + sub_ddddmmmmssss(__t2, s2, s1, s0, (ulong) 0, a2, a1, a0, (ulong) 0, b2, b1, b0); \ + sub_ddmmss(__u2, s3, (ulong) 0, a3, (ulong) 0, b3); \ + sub_ddmmss(s4, s3, (a4) - (b4), s3, -__u2, -__t2); \ + } while (0) + +#endif + + +int +nfloat_complex_get_acf(acf_t res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + int status; + status = nfloat_get_arf(acf_realref(res), NFLOAT_COMPLEX_RE(x, ctx), ctx); + status |= nfloat_get_arf(acf_imagref(res), NFLOAT_COMPLEX_IM(x, ctx), ctx); + return status; +} + +int +nfloat_complex_set_acf(nfloat_complex_ptr res, const acf_t x, gr_ctx_t ctx) +{ + int status; + status = nfloat_set_arf(NFLOAT_COMPLEX_RE(res, ctx), acf_realref(x), ctx); + status |= nfloat_set_arf(NFLOAT_COMPLEX_IM(res, ctx), acf_imagref(x), ctx); + return status; +} + +int +nfloat_complex_get_acb(acb_t res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + int status; + status = nfloat_get_arf(arb_midref(acb_realref(res)), NFLOAT_COMPLEX_RE(x, ctx), ctx); + mag_zero(arb_radref(acb_realref(res))); + status |= nfloat_get_arf(arb_midref(acb_imagref(res)), NFLOAT_COMPLEX_IM(x, ctx), ctx); + mag_zero(arb_radref(acb_imagref(res))); + return status; +} + +int +nfloat_complex_set_acb(nfloat_complex_ptr res, const acb_t x, gr_ctx_t ctx) +{ + int status; + status = nfloat_set_arf(NFLOAT_COMPLEX_RE(res, ctx), arb_midref(acb_realref(x)), ctx); + status |= nfloat_set_arf(NFLOAT_COMPLEX_IM(res, ctx), arb_midref(acb_imagref(x)), ctx); + return status; +} + +int +nfloat_complex_write(gr_stream_t out, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + gr_ctx_t acf_ctx; + acf_t t; + int status; + + gr_ctx_init_complex_float_acf(acf_ctx, NFLOAT_CTX_PREC(ctx)); + acf_init(t); + nfloat_get_arf(acf_realref(t), NFLOAT_COMPLEX_RE(x, ctx), ctx); + nfloat_get_arf(acf_imagref(t), NFLOAT_COMPLEX_IM(x, ctx), ctx); + status = gr_write(out, t, acf_ctx); + acf_clear(t); + return status; + gr_ctx_clear(acf_ctx); +} + +int +nfloat_complex_randtest(nfloat_complex_ptr res, flint_rand_t state, gr_ctx_t ctx) +{ + int status; + status = nfloat_randtest(NFLOAT_COMPLEX_RE(res, ctx), state, ctx); + status |= nfloat_randtest(NFLOAT_COMPLEX_IM(res, ctx), state, ctx); + return status; +} + +void +nfloat_complex_swap(nfloat_complex_ptr x, nfloat_complex_ptr y, gr_ctx_t ctx) +{ + slong i, n = NFLOAT_COMPLEX_CTX_DATA_NLIMBS(ctx); + + for (i = 0; i < n; i++) + FLINT_SWAP(ulong, NFLOAT_DATA(x)[i], NFLOAT_DATA(y)[i]); +} + +int +nfloat_complex_set(nfloat_complex_ptr res, nfloat_complex_ptr x, gr_ctx_t ctx) +{ + slong i, n = NFLOAT_COMPLEX_CTX_DATA_NLIMBS(ctx); + + for (i = 0; i < n; i++) + NFLOAT_DATA(res)[i] = NFLOAT_DATA(x)[i]; + + return GR_SUCCESS; +} + +int +nfloat_complex_one(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + nfloat_one(NFLOAT_COMPLEX_RE(res, ctx), ctx); + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +int +nfloat_complex_neg_one(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + nfloat_neg_one(NFLOAT_COMPLEX_RE(res, ctx), ctx); + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +truth_t +nfloat_complex_is_zero(nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + return truth_and(nfloat_is_zero(NFLOAT_COMPLEX_RE(x, ctx), ctx), + nfloat_is_zero(NFLOAT_COMPLEX_IM(x, ctx), ctx)); +} + +truth_t +nfloat_complex_is_one(nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + return truth_and(nfloat_is_one(NFLOAT_COMPLEX_RE(x, ctx), ctx), + nfloat_is_zero(NFLOAT_COMPLEX_IM(x, ctx), ctx)); +} + +truth_t +nfloat_complex_is_neg_one(nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + return truth_and(nfloat_is_neg_one(NFLOAT_COMPLEX_RE(x, ctx), ctx), + nfloat_is_zero(NFLOAT_COMPLEX_IM(x, ctx), ctx)); +} + +int +nfloat_complex_i(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_RE(res, ctx), ctx); + nfloat_one(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +int +nfloat_complex_pi(nfloat_complex_ptr res, gr_ctx_t ctx) +{ + nfloat_pi(NFLOAT_COMPLEX_RE(res, ctx), ctx); + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +/* todo: be smart when in-place */ +int +nfloat_complex_conj(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + nfloat_set(NFLOAT_COMPLEX_RE(res, ctx), NFLOAT_COMPLEX_RE(x, ctx), ctx); + nfloat_neg(NFLOAT_COMPLEX_IM(res, ctx), NFLOAT_COMPLEX_IM(x, ctx), ctx); + return GR_SUCCESS; +} + +int +nfloat_complex_re(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + nfloat_set(NFLOAT_COMPLEX_RE(res, ctx), NFLOAT_COMPLEX_RE(x, ctx), ctx); + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +int +nfloat_complex_im(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + nfloat_set(NFLOAT_COMPLEX_RE(res, ctx), NFLOAT_COMPLEX_IM(x, ctx), ctx); + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return GR_SUCCESS; +} + +truth_t +nfloat_complex_equal(nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) +{ + return truth_and(nfloat_equal(NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx), + nfloat_equal(NFLOAT_COMPLEX_IM(x, ctx), NFLOAT_COMPLEX_IM(y, ctx), ctx)); +} + +int +nfloat_complex_set_si(nfloat_complex_ptr res, slong x, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return nfloat_set_si(NFLOAT_COMPLEX_RE(res, ctx), x, ctx); +} + +int +nfloat_complex_set_ui(nfloat_complex_ptr res, ulong x, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return nfloat_set_ui(NFLOAT_COMPLEX_RE(res, ctx), x, ctx); +} + +int +nfloat_complex_set_fmpz(nfloat_complex_ptr res, const fmpz_t x, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return nfloat_set_fmpz(NFLOAT_COMPLEX_RE(res, ctx), x, ctx); +} + +int +nfloat_complex_set_fmpq(nfloat_complex_ptr res, const fmpq_t x, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return nfloat_set_fmpq(NFLOAT_COMPLEX_RE(res, ctx), x, ctx); +} + +int +nfloat_complex_set_d(nfloat_complex_ptr res, double x, gr_ctx_t ctx) +{ + nfloat_zero(NFLOAT_COMPLEX_IM(res, ctx), ctx); + return nfloat_set_d(NFLOAT_COMPLEX_RE(res, ctx), x, ctx); +} + +int +nfloat_complex_neg(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + nfloat_neg(NFLOAT_COMPLEX_RE(res, ctx), NFLOAT_COMPLEX_RE(x, ctx), ctx); + nfloat_neg(NFLOAT_COMPLEX_IM(res, ctx), NFLOAT_COMPLEX_IM(x, ctx), ctx); + return GR_SUCCESS; +} + +int +nfloat_complex_add(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) +{ + int status = GR_SUCCESS; + status |= nfloat_add(NFLOAT_COMPLEX_RE(res, ctx), NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx); + status |= nfloat_add(NFLOAT_COMPLEX_IM(res, ctx), NFLOAT_COMPLEX_IM(x, ctx), NFLOAT_COMPLEX_IM(y, ctx), ctx); + return status; +} + +int +nfloat_complex_sub(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) +{ + int status = GR_SUCCESS; + status |= nfloat_sub(NFLOAT_COMPLEX_RE(res, ctx), NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx); + status |= nfloat_sub(NFLOAT_COMPLEX_IM(res, ctx), NFLOAT_COMPLEX_IM(x, ctx), NFLOAT_COMPLEX_IM(y, ctx), ctx); + return status; +} + +static inline int +_flint_mpn_cmp_2(ulong a1, ulong a0, ulong b1, ulong b0) +{ + if (a1 != b1) return (a1 < b1) ? -1 : 1; + if (a0 != b0) return (a0 < b0) ? -1 : 1; + return 0; +} + +static inline int +_flint_mpn_cmp_3(ulong a2, ulong a1, ulong a0, ulong b2, ulong b1, ulong b0) +{ + if (a2 != b2) return (a2 < b2) ? -1 : 1; + if (a1 != b1) return (a1 < b1) ? -1 : 1; + if (a0 != b0) return (a0 < b0) ? -1 : 1; + return 0; +} + +int +_nfloat_complex_sqr_naive(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) +{ + ulong a2[NFLOAT_MAX_ALLOC]; + ulong b2[NFLOAT_MAX_ALLOC]; + int status = GR_SUCCESS; + + status |= nfloat_mul(a2, a, a, ctx); + status |= nfloat_mul(b2, b, b, ctx); + status |= nfloat_mul(res2, a, b, ctx); + status |= nfloat_mul_2exp_si(res2, res2, 1, ctx); + status |= nfloat_sub(res1, a2, b2, ctx); + + return status; +} + +int +_nfloat_complex_sqr_standard(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) +{ + int status = GR_SUCCESS; + ulong a2[NFLOAT_MAX_LIMBS + 1]; + ulong b2[NFLOAT_MAX_LIMBS + 1]; + ulong hi, lo; + int ssgnbit; + slong a2exp, b2exp, a2b2exp, delta; + slong n, norm; + + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(a)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(b)); + + n = NFLOAT_CTX_NLIMBS(ctx); + + a2exp = 2 * NFLOAT_EXP(a); + b2exp = 2 * NFLOAT_EXP(b); + + a2b2exp = FLINT_MAX(a2exp, b2exp); + delta = a2b2exp - FLINT_MIN(a2exp, b2exp); + + /* TODO: this case is rare, but we could optimize it too */ + if (delta >= FLINT_BITS) + return _nfloat_complex_sqr_naive(res1, res2, a, b, ctx); + + if (n == 1) + { + umul_ppmm(a2[1], a2[0], NFLOAT_D(a)[0], NFLOAT_D(a)[0]); + umul_ppmm(b2[1], b2[0], NFLOAT_D(b)[0], NFLOAT_D(b)[0]); + + if (a2exp == b2exp) + { + if (_flint_mpn_cmp_2(a2[1], a2[0], b2[1], b2[0]) >= 0) + { + sub_ddmmss(a2[1], a2[0], a2[1], a2[0], b2[1], b2[0]); + ssgnbit = 0; + } + else + { + sub_ddmmss(a2[1], a2[0], b2[1], b2[0], a2[1], a2[0]); + ssgnbit = 1; + } + } + else if (a2exp > b2exp) + { + b2[0] = (b2[0] >> delta) | (b2[1] << (FLINT_BITS - delta)); + b2[1] = (b2[1] >> delta); + sub_ddmmss(a2[1], a2[0], a2[1], a2[0], b2[1], b2[0]); + ssgnbit = 0; + } + else + { + a2[0] = (a2[0] >> delta) | (a2[1] << (FLINT_BITS - delta)); + a2[1] = (a2[1] >> delta); + sub_ddmmss(a2[1], a2[0], b2[1], b2[0], a2[1], a2[0]); + ssgnbit = 1; + } + + umul_ppmm(hi, lo, NFLOAT_D(a)[0], NFLOAT_D(b)[0]); + + if (LIMB_MSB_IS_SET(hi)) + { + NFLOAT_D(res2)[0] = hi; + NFLOAT_EXP(res2) = NFLOAT_EXP(a) + NFLOAT_EXP(b) + 1; + } + else + { + NFLOAT_D(res2)[0] = (hi << 1) | (lo >> (FLINT_BITS - 1)); + NFLOAT_EXP(res2) = NFLOAT_EXP(a) + NFLOAT_EXP(b); + } + + NFLOAT_SGNBIT(res2) = NFLOAT_SGNBIT(a) ^ NFLOAT_SGNBIT(b); + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res2, ctx); + + if (a2[1] == 0) + { + if (a2[0] == 0) + { + status |= nfloat_zero(res1, ctx); + return status; + } + else + { + norm = flint_clz(a2[0]); + a2[1] = a2[0] << norm; + a2b2exp -= FLINT_BITS + norm; + } + } + else + { + norm = flint_clz(a2[1]); + + if (norm != 0) + { + a2[1] = (a2[1] << norm) | (a2[0] >> (FLINT_BITS - norm)); + a2b2exp -= norm; + } + } + + NFLOAT_SGNBIT(res1) = ssgnbit; + NFLOAT_EXP(res1) = a2b2exp; + NFLOAT_D(res1)[0] = a2[1]; + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res1, ctx); + return status; + } + + if (n == 2) + { + ulong FLINT_SET_BUT_UNUSED(uu); + + FLINT_MPN_SQR_2X2(a2[2], a2[1], a2[0], uu, NFLOAT_D(a)[1], NFLOAT_D(a)[0]); + FLINT_MPN_SQR_2X2(b2[2], b2[1], b2[0], uu, NFLOAT_D(b)[1], NFLOAT_D(b)[0]); + + if (a2exp == b2exp) + { + a2b2exp = a2exp; + + if (_flint_mpn_cmp_3(a2[2], a2[1], a2[0], b2[2], b2[1], b2[0]) >= 0) + { + sub_dddmmmsss(a2[2], a2[1], a2[0], a2[2], a2[1], a2[0], b2[2], b2[1], b2[0]); + ssgnbit = 0; + } + else + { + sub_dddmmmsss(a2[2], a2[1], a2[0], b2[2], b2[1], b2[0], a2[2], a2[1], a2[0]); + ssgnbit = 1; + } + } + else if (a2exp > b2exp) + { + b2[0] = (b2[0] >> delta) | (b2[1] << (FLINT_BITS - delta)); + b2[1] = (b2[1] >> delta) | (b2[2] << (FLINT_BITS - delta)); + b2[2] = (b2[2] >> delta); + + sub_dddmmmsss(a2[2], a2[1], a2[0], a2[2], a2[1], a2[0], b2[2], b2[1], b2[0]); + ssgnbit = 0; + } + else + { + a2[0] = (a2[0] >> delta) | (a2[1] << (FLINT_BITS - delta)); + a2[1] = (a2[1] >> delta) | (a2[2] << (FLINT_BITS - delta)); + a2[2] = (a2[2] >> delta); + + sub_dddmmmsss(a2[2], a2[1], a2[0], b2[2], b2[1], b2[0], a2[2], a2[1], a2[0]); + ssgnbit = 1; + } + + status |= nfloat_mul(res2, a, b, ctx); + if (NFLOAT_EXP(res2) >= NFLOAT_MIN_EXP && NFLOAT_EXP(res2) < NFLOAT_MAX_EXP) + NFLOAT_EXP(res2)++; + else + status |= nfloat_mul_2exp_si(res2, res2, 1, ctx); + + status |= nfloat_2_set_3_2exp(res1, a2[2], a2[1], a2[0], a2b2exp, ssgnbit, ctx); + return status; + } + else + { + a2[0] = flint_mpn_sqrhigh(a2 + 1, NFLOAT_D(a), n); + b2[0] = flint_mpn_sqrhigh(b2 + 1, NFLOAT_D(b), n); + + if (n == 3) + { + if (a2exp == b2exp) + { + ssgnbit = (mpn_cmp(a2, b2, n + 1) < 0); + } + else if (a2exp > b2exp) + { + b2[0] = (b2[0] >> delta) | (b2[1] << (FLINT_BITS - delta)); + b2[1] = (b2[1] >> delta) | (b2[2] << (FLINT_BITS - delta)); + b2[2] = (b2[2] >> delta) | (b2[3] << (FLINT_BITS - delta)); + b2[3] = (b2[3] >> delta); + ssgnbit = 0; + } + else + { + a2[0] = (a2[0] >> delta) | (a2[1] << (FLINT_BITS - delta)); + a2[1] = (a2[1] >> delta) | (a2[2] << (FLINT_BITS - delta)); + a2[2] = (a2[2] >> delta) | (a2[3] << (FLINT_BITS - delta)); + a2[3] = (a2[3] >> delta); + ssgnbit = 1; + } + + if (ssgnbit == 0) + sub_ddddmmmmssss(a2[3], a2[2], a2[1], a2[0], a2[3], a2[2], a2[1], a2[0], b2[3], b2[2], b2[1], b2[0]); + else + sub_ddddmmmmssss(a2[3], a2[2], a2[1], a2[0], b2[3], b2[2], b2[1], b2[0], a2[3], a2[2], a2[1], a2[0]); + } + else if (n == 4) + { + if (a2exp == b2exp) + { + ssgnbit = (mpn_cmp(a2, b2, n + 1) < 0); + } + else if (a2exp > b2exp) + { + b2[0] = (b2[0] >> delta) | (b2[1] << (FLINT_BITS - delta)); + b2[1] = (b2[1] >> delta) | (b2[2] << (FLINT_BITS - delta)); + b2[2] = (b2[2] >> delta) | (b2[3] << (FLINT_BITS - delta)); + b2[3] = (b2[3] >> delta) | (b2[4] << (FLINT_BITS - delta)); + b2[4] = (b2[4] >> delta); + ssgnbit = 0; + } + else + { + a2[0] = (a2[0] >> delta) | (a2[1] << (FLINT_BITS - delta)); + a2[1] = (a2[1] >> delta) | (a2[2] << (FLINT_BITS - delta)); + a2[2] = (a2[2] >> delta) | (a2[3] << (FLINT_BITS - delta)); + a2[3] = (a2[3] >> delta) | (a2[4] << (FLINT_BITS - delta)); + a2[4] = (a2[4] >> delta); + ssgnbit = 1; + } + + if (ssgnbit == 0) + sub_dddddmmmmmsssss(a2[4], a2[3], a2[2], a2[1], a2[0], a2[4], a2[3], a2[2], a2[1], a2[0], b2[4], b2[3], b2[2], b2[1], b2[0]); + else + sub_dddddmmmmmsssss(a2[4], a2[3], a2[2], a2[1], a2[0], b2[4], b2[3], b2[2], b2[1], b2[0], a2[4], a2[3], a2[2], a2[1], a2[0]); + } + else + { + if (a2exp == b2exp) + { + ssgnbit = flint_mpn_signed_sub_n(a2, a2, b2, n + 1); + } + else if (a2exp > b2exp) + { + mpn_rshift(b2, b2, n + 1, a2exp - b2exp); + mpn_sub_n(a2, a2, b2, n + 1); + ssgnbit = 0; + } + else + { + mpn_rshift(a2, a2, n + 1, b2exp - a2exp); + mpn_sub_n(a2, b2, a2, n + 1); + ssgnbit = 1; + } + } + } + + status |= nfloat_mul(res2, a, b, ctx); + if (NFLOAT_EXP(res2) >= NFLOAT_MIN_EXP && NFLOAT_EXP(res2) < NFLOAT_MAX_EXP) + NFLOAT_EXP(res2)++; + else + status |= nfloat_mul_2exp_si(res2, res2, 1, ctx); + + status |= nfloat_set_mpn_2exp(res1, a2, n + 1, a2b2exp, ssgnbit, ctx); + + return status; +} + +int +_nfloat_complex_sqr_karatsuba(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) +{ + slong aexp, bexp, abexp, adelta, bdelta; + int asgnbit, bsgnbit, ssgnbit, tsgnbit; + int status; + slong n; + ulong aa[NFLOAT_MAX_LIMBS + 1]; + ulong bb[NFLOAT_MAX_LIMBS + 1]; + ulong s[NFLOAT_MAX_LIMBS + 1]; + ulong t[NFLOAT_MAX_LIMBS + 1]; + ulong u[NFLOAT_MAX_LIMBS + 1]; + ulong v[NFLOAT_MAX_LIMBS + 1]; + + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(a)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(b)); + + n = NFLOAT_CTX_NLIMBS(ctx); + + aexp = NFLOAT_EXP(a); + bexp = NFLOAT_EXP(b); + + asgnbit = NFLOAT_SGNBIT(a); + bsgnbit = NFLOAT_SGNBIT(b); + + /* Two extra bits allows adding without overflow. */ + abexp = FLINT_MAX(aexp, bexp) + 2; + adelta = abexp - aexp; + bdelta = abexp - bexp; + + /* We use one guard limb. There is cancellation of about + max(adelta,bdelta) bits; abandon if we are running out + of guard bits. */ + if (adelta >= FLINT_BITS - 4 || bdelta >= FLINT_BITS - 4) + return _nfloat_complex_sqr_standard(res1, res2, a, b, ctx); + + aa[0] = mpn_rshift(aa + 1, NFLOAT_D(a), n, adelta); + bb[0] = mpn_rshift(bb + 1, NFLOAT_D(b), n, bdelta); + _flint_mpn_signed_add_n(v, aa, asgnbit, bb, bsgnbit, n + 1); + flint_mpn_sqrhigh(s, v, n + 1); + flint_mpn_sqrhigh(t, aa, n + 1); + flint_mpn_sqrhigh(u, bb, n + 1); + mpn_add_n(v, t, u, n + 1); + ssgnbit = flint_mpn_signed_sub_n(s, s, v, n + 1); + tsgnbit = flint_mpn_signed_sub_n(t, t, u, n + 1); + + status = GR_SUCCESS; + status |= nfloat_set_mpn_2exp(res1, t, n + 1, 2 * abexp, tsgnbit, ctx); + status |= nfloat_set_mpn_2exp(res2, s, n + 1, 2 * abexp, ssgnbit, ctx); + return status; +} + +int +_nfloat_complex_sqr(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, gr_ctx_t ctx) +{ + int status; + + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) + return _nfloat_complex_sqr_naive(res1, res2, a, b, ctx); + + if (NFLOAT_IS_ZERO(b)) + { + status = nfloat_sqr(res1, a, ctx); + status |= nfloat_zero(res2, ctx); + return status; + } + + if (NFLOAT_IS_ZERO(a)) + { + status = nfloat_sqr(res1, b, ctx); + status |= nfloat_neg(res1, res1, ctx); + status |= nfloat_zero(res2, ctx); + return status; + } + + if (NFLOAT_CTX_NLIMBS(ctx) < 20) + return _nfloat_complex_sqr_standard(res1, res2, a, b, ctx); + else + return _nfloat_complex_sqr_karatsuba(res1, res2, a, b, ctx); +} + +int +nfloat_complex_sqr(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + return _nfloat_complex_sqr(NFLOAT_COMPLEX_RE(res, ctx), + NFLOAT_COMPLEX_IM(res, ctx), + NFLOAT_COMPLEX_RE(x, ctx), + NFLOAT_COMPLEX_IM(x, ctx), ctx); +} + +int +_nfloat_complex_mul_naive(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx) +{ + ulong ac[NFLOAT_MAX_ALLOC]; + ulong bd[NFLOAT_MAX_ALLOC]; + ulong ad[NFLOAT_MAX_ALLOC]; + ulong bc[NFLOAT_MAX_ALLOC]; + int status = GR_SUCCESS; + + status |= nfloat_mul(ac, a, c, ctx); + status |= nfloat_mul(bd, b, d, ctx); + status |= nfloat_mul(ad, a, d, ctx); + status |= nfloat_mul(bc, b, c, ctx); + status |= nfloat_sub(res1, ac, bd, ctx); + status |= nfloat_add(res2, ad, bc, ctx); + + return status; +} + +int +_nfloat_complex_mul_standard(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx) +{ + slong aexp, bexp, cexp, dexp, acexp, bdexp, adexp, bcexp; + slong sexp, texp, sdelta, tdelta; + int asgnbit, bsgnbit, csgnbit, dsgnbit, usgnbit, vsgnbit, ssgnbit, tsgnbit; + int status; + slong n; + ulong u[NFLOAT_MAX_LIMBS + 1]; + ulong v[NFLOAT_MAX_LIMBS + 1]; + ulong s[NFLOAT_MAX_LIMBS + 2]; + ulong t[NFLOAT_MAX_LIMBS + 2]; + + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(a)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(b)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(c)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(d)); + + n = NFLOAT_CTX_NLIMBS(ctx); + + aexp = NFLOAT_EXP(a); + bexp = NFLOAT_EXP(b); + cexp = NFLOAT_EXP(c); + dexp = NFLOAT_EXP(d); + + asgnbit = NFLOAT_SGNBIT(a); + bsgnbit = NFLOAT_SGNBIT(b); + csgnbit = NFLOAT_SGNBIT(c); + dsgnbit = NFLOAT_SGNBIT(d); + + /* ac - bd */ + acexp = aexp + cexp; + bdexp = bexp + dexp; + sexp = FLINT_MAX(acexp, bdexp); + sdelta = sexp - FLINT_MIN(acexp, bdexp); + + /* ad + bc */ + adexp = aexp + dexp; + bcexp = bexp + cexp; + texp = FLINT_MAX(adexp, bcexp); + tdelta = texp - FLINT_MIN(adexp, bcexp); + + /* todo */ + if (sdelta >= FLINT_BITS || tdelta >= FLINT_BITS) + return _nfloat_complex_mul_naive(res1, res2, a, b, c, d, ctx); + + if (n == 1) + { + umul_ppmm(u[1], u[0], NFLOAT_D(a)[0], NFLOAT_D(c)[0]); + umul_ppmm(v[1], v[0], NFLOAT_D(b)[0], NFLOAT_D(d)[0]); + usgnbit = asgnbit ^ csgnbit; + vsgnbit = !(bsgnbit ^ dsgnbit); + + if (sdelta != 0) + { + if (acexp > bdexp) + { + v[0] = (v[0] >> sdelta) | (v[1] << (FLINT_BITS - sdelta)); + v[1] >>= sdelta; + } + else + { + u[0] = (u[0] >> sdelta) | (u[1] << (FLINT_BITS - sdelta)); + u[1] >>= sdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_sssaaaaaa(s[2], s[1], s[0], 0, u[1], u[0], 0, v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + if (_flint_mpn_cmp_2(u[1], u[0], v[1], v[0]) >= 0) + { + sub_ddmmss(s[1], s[0], u[1], u[0], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + sub_ddmmss(s[1], s[0], v[1], v[0], u[1], u[0]); + ssgnbit = !usgnbit; + } + + s[2] = 0; + } + + umul_ppmm(u[1], u[0], NFLOAT_D(a)[0], NFLOAT_D(d)[0]); + umul_ppmm(v[1], v[0], NFLOAT_D(b)[0], NFLOAT_D(c)[0]); + usgnbit = asgnbit ^ dsgnbit; + vsgnbit = bsgnbit ^ csgnbit; + + if (tdelta != 0) + { + if (adexp > bcexp) + { + v[0] = (v[0] >> tdelta) | (v[1] << (FLINT_BITS - tdelta)); + v[1] >>= tdelta; + } + else + { + u[0] = (u[0] >> tdelta) | (u[1] << (FLINT_BITS - tdelta)); + u[1] >>= tdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_sssaaaaaa(t[2], t[1], t[0], 0, u[1], u[0], 0, v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + if (_flint_mpn_cmp_2(u[1], u[0], v[1], v[0]) >= 0) + { + sub_ddmmss(t[1], t[0], u[1], u[0], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + sub_ddmmss(t[1], t[0], v[1], v[0], u[1], u[0]); + tsgnbit = !usgnbit; + } + + t[2] = 0; + } + + status = GR_SUCCESS; + status |= nfloat_1_set_3_2exp(res1, s[2], s[1], s[0], sexp + FLINT_BITS, ssgnbit, ctx); + status |= nfloat_1_set_3_2exp(res2, t[2], t[1], t[0], texp + FLINT_BITS, tsgnbit, ctx); + return status; + } + else if (n == 2) + { + ulong FLINT_SET_BUT_UNUSED(uu); + + FLINT_MPN_MUL_2X2(u[2], u[1], u[0], uu, NFLOAT_D(a)[1], NFLOAT_D(a)[0], NFLOAT_D(c)[1], NFLOAT_D(c)[0]); + FLINT_MPN_MUL_2X2(v[2], v[1], v[0], uu, NFLOAT_D(b)[1], NFLOAT_D(b)[0], NFLOAT_D(d)[1], NFLOAT_D(d)[0]); + usgnbit = asgnbit ^ csgnbit; + vsgnbit = !(bsgnbit ^ dsgnbit); + + if (sdelta != 0) + { + if (acexp > bdexp) + { + v[0] = (v[0] >> sdelta) | (v[1] << (FLINT_BITS - sdelta)); + v[1] = (v[1] >> sdelta) | (v[2] << (FLINT_BITS - sdelta)); + v[2] >>= sdelta; + } + else + { + u[0] = (u[0] >> sdelta) | (u[1] << (FLINT_BITS - sdelta)); + u[1] = (u[1] >> sdelta) | (u[2] << (FLINT_BITS - sdelta)); + u[2] >>= sdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_ssssaaaaaaaa(s[3], s[2], s[1], s[0], 0, u[2], u[1], u[0], 0, v[2], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + if (_flint_mpn_cmp_3(u[2], u[1], u[0], v[2], v[1], v[0]) >= 0) + { + sub_dddmmmsss(s[2], s[1], s[0], u[2], u[1], u[0], v[2], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + sub_dddmmmsss(s[2], s[1], s[0], v[2], v[1], v[0], u[2], u[1], u[0]); + ssgnbit = !usgnbit; + } + + s[3] = 0; + } + + FLINT_MPN_MUL_2X2(u[2], u[1], u[0], uu, NFLOAT_D(a)[1], NFLOAT_D(a)[0], NFLOAT_D(d)[1], NFLOAT_D(d)[0]); + FLINT_MPN_MUL_2X2(v[2], v[1], v[0], uu, NFLOAT_D(b)[1], NFLOAT_D(b)[0], NFLOAT_D(c)[1], NFLOAT_D(c)[0]); + usgnbit = asgnbit ^ dsgnbit; + vsgnbit = bsgnbit ^ csgnbit; + + if (tdelta != 0) + { + if (adexp > bcexp) + { + v[0] = (v[0] >> tdelta) | (v[1] << (FLINT_BITS - tdelta)); + v[1] = (v[1] >> tdelta) | (v[2] << (FLINT_BITS - tdelta)); + v[2] >>= tdelta; + } + else + { + u[0] = (u[0] >> tdelta) | (u[1] << (FLINT_BITS - tdelta)); + u[1] = (u[1] >> tdelta) | (u[2] << (FLINT_BITS - tdelta)); + u[2] >>= tdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_ssssaaaaaaaa(t[3], t[2], t[1], t[0], 0, u[2], u[1], u[0], 0, v[2], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + if (_flint_mpn_cmp_3(u[2], u[1], u[0], v[2], v[1], v[0]) >= 0) + { + sub_dddmmmsss(t[2], t[1], t[0], u[2], u[1], u[0], v[2], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + sub_dddmmmsss(t[2], t[1], t[0], v[2], v[1], v[0], u[2], u[1], u[0]); + tsgnbit = !usgnbit; + } + + t[3] = 0; + } + + status = GR_SUCCESS; + status |= nfloat_2_set_4_2exp(res1, s[3], s[2], s[1], s[0], sexp + FLINT_BITS, ssgnbit, ctx); + status |= nfloat_2_set_4_2exp(res2, t[3], t[2], t[1], t[0], texp + FLINT_BITS, tsgnbit, ctx); + return status; + } + else if (n == 3) + { + u[0] = flint_mpn_mulhigh_n(u + 1, NFLOAT_D(a), NFLOAT_D(c), n); + v[0] = flint_mpn_mulhigh_n(v + 1, NFLOAT_D(b), NFLOAT_D(d), n); + usgnbit = asgnbit ^ csgnbit; + vsgnbit = !(bsgnbit ^ dsgnbit); + + if (sdelta != 0) + { + if (acexp > bdexp) + { + v[0] = (v[0] >> sdelta) | (v[1] << (FLINT_BITS - sdelta)); + v[1] = (v[1] >> sdelta) | (v[2] << (FLINT_BITS - sdelta)); + v[2] = (v[2] >> sdelta) | (v[3] << (FLINT_BITS - sdelta)); + v[3] >>= sdelta; + } + else + { + u[0] = (u[0] >> sdelta) | (u[1] << (FLINT_BITS - sdelta)); + u[1] = (u[1] >> sdelta) | (u[2] << (FLINT_BITS - sdelta)); + u[2] = (u[2] >> sdelta) | (u[3] << (FLINT_BITS - sdelta)); + u[3] >>= sdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_sssssaaaaaaaaaa(s[4], s[3], s[2], s[1], s[0], 0, u[3], u[2], u[1], u[0], 0, v[3], v[2], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + if (mpn_cmp(u, v, 4) >= 0) + { + sub_ddddmmmmssss(s[3], s[2], s[1], s[0], u[3], u[2], u[1], u[0], v[3], v[2], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + sub_ddddmmmmssss(s[3], s[2], s[1], s[0], v[3], v[2], v[1], v[0], u[3], u[2], u[1], u[0]); + ssgnbit = !usgnbit; + } + + s[4] = 0; + } + + u[0] = flint_mpn_mulhigh_n(u + 1, NFLOAT_D(a), NFLOAT_D(d), n); + v[0] = flint_mpn_mulhigh_n(v + 1, NFLOAT_D(b), NFLOAT_D(c), n); + usgnbit = asgnbit ^ dsgnbit; + vsgnbit = bsgnbit ^ csgnbit; + + if (tdelta != 0) + { + if (adexp > bcexp) + { + v[0] = (v[0] >> tdelta) | (v[1] << (FLINT_BITS - tdelta)); + v[1] = (v[1] >> tdelta) | (v[2] << (FLINT_BITS - tdelta)); + v[2] = (v[2] >> tdelta) | (v[3] << (FLINT_BITS - tdelta)); + v[3] >>= tdelta; + } + else + { + u[0] = (u[0] >> tdelta) | (u[1] << (FLINT_BITS - tdelta)); + u[1] = (u[1] >> tdelta) | (u[2] << (FLINT_BITS - tdelta)); + u[2] = (u[2] >> tdelta) | (u[3] << (FLINT_BITS - tdelta)); + u[3] >>= tdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_sssssaaaaaaaaaa(t[4], t[3], t[2], t[1], t[0], 0, u[3], u[2], u[1], u[0], 0, v[3], v[2], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + if (mpn_cmp(u, v, 4) >= 0) + { + sub_ddddmmmmssss(t[3], t[2], t[1], t[0], u[3], u[2], u[1], u[0], v[3], v[2], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + sub_ddddmmmmssss(t[3], t[2], t[1], t[0], v[3], v[2], v[1], v[0], u[3], u[2], u[1], u[0]); + tsgnbit = !usgnbit; + } + + t[4] = 0; + } + } + else if (n == 4) + { + u[0] = flint_mpn_mulhigh_n(u + 1, NFLOAT_D(a), NFLOAT_D(c), n); + v[0] = flint_mpn_mulhigh_n(v + 1, NFLOAT_D(b), NFLOAT_D(d), n); + usgnbit = asgnbit ^ csgnbit; + vsgnbit = !(bsgnbit ^ dsgnbit); + + if (sdelta != 0) + { + if (acexp > bdexp) + { + v[0] = (v[0] >> sdelta) | (v[1] << (FLINT_BITS - sdelta)); + v[1] = (v[1] >> sdelta) | (v[2] << (FLINT_BITS - sdelta)); + v[2] = (v[2] >> sdelta) | (v[3] << (FLINT_BITS - sdelta)); + v[3] = (v[3] >> sdelta) | (v[4] << (FLINT_BITS - sdelta)); + v[4] >>= sdelta; + } + else + { + u[0] = (u[0] >> sdelta) | (u[1] << (FLINT_BITS - sdelta)); + u[1] = (u[1] >> sdelta) | (u[2] << (FLINT_BITS - sdelta)); + u[2] = (u[2] >> sdelta) | (u[3] << (FLINT_BITS - sdelta)); + u[3] = (u[3] >> sdelta) | (u[4] << (FLINT_BITS - sdelta)); + u[4] >>= sdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_ssssssaaaaaaaaaaaa(s[5], s[4], s[3], s[2], s[1], s[0], 0, u[4], u[3], u[2], u[1], u[0], 0, v[4], v[3], v[2], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + if (mpn_cmp(u, v, 5) >= 0) + { + sub_dddddmmmmmsssss(s[4], s[3], s[2], s[1], s[0], u[4], u[3], u[2], u[1], u[0], v[4], v[3], v[2], v[1], v[0]); + ssgnbit = usgnbit; + } + else + { + sub_dddddmmmmmsssss(s[4], s[3], s[2], s[1], s[0], v[4], v[3], v[2], v[1], v[0], u[4], u[3], u[2], u[1], u[0]); + ssgnbit = !usgnbit; + } + + s[5] = 0; + } + + u[0] = flint_mpn_mulhigh_n(u + 1, NFLOAT_D(a), NFLOAT_D(d), n); + v[0] = flint_mpn_mulhigh_n(v + 1, NFLOAT_D(b), NFLOAT_D(c), n); + usgnbit = asgnbit ^ dsgnbit; + vsgnbit = bsgnbit ^ csgnbit; + + if (tdelta != 0) + { + if (adexp > bcexp) + { + v[0] = (v[0] >> tdelta) | (v[1] << (FLINT_BITS - tdelta)); + v[1] = (v[1] >> tdelta) | (v[2] << (FLINT_BITS - tdelta)); + v[2] = (v[2] >> tdelta) | (v[3] << (FLINT_BITS - tdelta)); + v[3] = (v[3] >> tdelta) | (v[4] << (FLINT_BITS - tdelta)); + v[4] >>= tdelta; + } + else + { + u[0] = (u[0] >> tdelta) | (u[1] << (FLINT_BITS - tdelta)); + u[1] = (u[1] >> tdelta) | (u[2] << (FLINT_BITS - tdelta)); + u[2] = (u[2] >> tdelta) | (u[3] << (FLINT_BITS - tdelta)); + u[3] = (u[3] >> tdelta) | (u[4] << (FLINT_BITS - tdelta)); + u[4] >>= tdelta; + } + } + + if (usgnbit == vsgnbit) + { + add_ssssssaaaaaaaaaaaa(t[5], t[4], t[3], t[2], t[1], t[0], 0, u[4], u[3], u[2], u[1], u[0], 0, v[4], v[3], v[2], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + if (mpn_cmp(u, v, 5) >= 0) + { + sub_dddddmmmmmsssss(t[4], t[3], t[2], t[1], t[0], u[4], u[3], u[2], u[1], u[0], v[4], v[3], v[2], v[1], v[0]); + tsgnbit = usgnbit; + } + else + { + sub_dddddmmmmmsssss(t[4], t[3], t[2], t[1], t[0], v[4], v[3], v[2], v[1], v[0], u[4], u[3], u[2], u[1], u[0]); + tsgnbit = !usgnbit; + } + + t[5] = 0; + } + } + else + { + u[0] = flint_mpn_mulhigh_n(u + 1, NFLOAT_D(a), NFLOAT_D(c), n); + v[0] = flint_mpn_mulhigh_n(v + 1, NFLOAT_D(b), NFLOAT_D(d), n); + usgnbit = asgnbit ^ csgnbit; + vsgnbit = !(bsgnbit ^ dsgnbit); + + if (sdelta != 0) + { + if (acexp > bdexp) + mpn_rshift(v, v, n + 1, sdelta); + else + mpn_rshift(u, u, n + 1, sdelta); + } + + if (usgnbit == vsgnbit) + { + s[n + 1] = mpn_add_n(s, u, v, n + 1); + ssgnbit = usgnbit; + } + else + { + ssgnbit = usgnbit ^ flint_mpn_signed_sub_n(s, u, v, n + 1); + s[n + 1] = 0; + } + + u[0] = flint_mpn_mulhigh_n(u + 1, NFLOAT_D(a), NFLOAT_D(d), n); + v[0] = flint_mpn_mulhigh_n(v + 1, NFLOAT_D(b), NFLOAT_D(c), n); + usgnbit = asgnbit ^ dsgnbit; + vsgnbit = bsgnbit ^ csgnbit; + + if (tdelta != 0) + { + if (adexp > bcexp) + mpn_rshift(v, v, n + 1, tdelta); + else + mpn_rshift(u, u, n + 1, tdelta); + } + + if (usgnbit == vsgnbit) + { + t[n + 1] = mpn_add_n(t, u, v, n + 1); + tsgnbit = usgnbit; + } + else + { + tsgnbit = usgnbit ^ flint_mpn_signed_sub_n(t, u, v, n + 1); + t[n + 1] = 0; + } + } + + status = GR_SUCCESS; + status |= nfloat_set_mpn_2exp(res1, s, n + 2, sexp + FLINT_BITS, ssgnbit, ctx); + status |= nfloat_set_mpn_2exp(res2, t, n + 2, texp + FLINT_BITS, tsgnbit, ctx); + + return status; +} + +int +_nfloat_complex_mul_karatsuba(nfloat_ptr res1, nfloat_ptr res2, nfloat_srcptr a, nfloat_srcptr b, nfloat_srcptr c, nfloat_srcptr d, gr_ctx_t ctx) +{ + slong aexp, bexp, cexp, dexp, abexp, cdexp, adelta, bdelta, cdelta, ddelta; + int asgnbit, bsgnbit, csgnbit, dsgnbit, ssgnbit, tsgnbit, usgnbit; + int status; + slong n; + + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(a)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(b)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(c)); + FLINT_ASSERT(!NFLOAT_IS_SPECIAL(d)); + + n = NFLOAT_CTX_NLIMBS(ctx); + + aexp = NFLOAT_EXP(a); + bexp = NFLOAT_EXP(b); + cexp = NFLOAT_EXP(c); + dexp = NFLOAT_EXP(d); + + asgnbit = NFLOAT_SGNBIT(a); + bsgnbit = NFLOAT_SGNBIT(b); + csgnbit = NFLOAT_SGNBIT(c); + dsgnbit = NFLOAT_SGNBIT(d); + + abexp = FLINT_MAX(aexp, bexp) + 2; + cdexp = FLINT_MAX(cexp, dexp) + 2; + + adelta = abexp - aexp; + bdelta = abexp - bexp; + cdelta = cdexp - cexp; + ddelta = cdexp - dexp; + + if (adelta < FLINT_BITS && bdelta < FLINT_BITS && + cdelta < FLINT_BITS && ddelta < FLINT_BITS) + { + ulong aa[NFLOAT_MAX_LIMBS + 1]; + ulong bb[NFLOAT_MAX_LIMBS + 1]; + ulong cc[NFLOAT_MAX_LIMBS + 1]; + ulong dd[NFLOAT_MAX_LIMBS + 1]; + ulong s[NFLOAT_MAX_LIMBS + 1]; + ulong t[NFLOAT_MAX_LIMBS + 1]; + ulong u[NFLOAT_MAX_LIMBS + 1]; + ulong v[NFLOAT_MAX_LIMBS + 1]; + + /* + s = c * (a + b) + t = a * (d - c) + u = b * (c + d) + re = s - u + im = s + t + */ + + aa[0] = mpn_rshift(aa + 1, NFLOAT_D(a), n, adelta); + bb[0] = mpn_rshift(bb + 1, NFLOAT_D(b), n, bdelta); + cc[0] = mpn_rshift(cc + 1, NFLOAT_D(c), n, cdelta); + dd[0] = mpn_rshift(dd + 1, NFLOAT_D(d), n, ddelta); + + ssgnbit = csgnbit ^ _flint_mpn_signed_add_n(v, aa, asgnbit, bb, bsgnbit, n + 1); + flint_mpn_mulhigh_n(s, cc, v, n + 1); + + tsgnbit = asgnbit ^ _flint_mpn_signed_add_n(v, dd, dsgnbit, cc, !csgnbit, n + 1); + flint_mpn_mulhigh_n(t, aa, v, n + 1); + + usgnbit = bsgnbit ^ _flint_mpn_signed_add_n(v, cc, csgnbit, dd, dsgnbit, n + 1); + flint_mpn_mulhigh_n(u, bb, v, n + 1); + + usgnbit = _flint_mpn_signed_add_n(u, s, ssgnbit, u, !usgnbit, n + 1); + tsgnbit = _flint_mpn_signed_add_n(t, s, ssgnbit, t, tsgnbit, n + 1); + + status = GR_SUCCESS; + status |= nfloat_set_mpn_2exp(res1, u, n + 1, abexp + cdexp, usgnbit, ctx); + status |= nfloat_set_mpn_2exp(res2, t, n + 1, abexp + cdexp, tsgnbit, ctx); + return status; + } + + return _nfloat_complex_mul_naive(res1, res2, a, b, c, d, ctx); +} + +int +nfloat_complex_mul(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) +{ + nfloat_srcptr a, b, c, d; + nfloat_ptr r, s; + int status; + + if (x == y) + return nfloat_complex_sqr(res, x, ctx); + + r = NFLOAT_COMPLEX_RE(res, ctx); + s = NFLOAT_COMPLEX_IM(res, ctx); + + a = NFLOAT_COMPLEX_RE(x, ctx); + b = NFLOAT_COMPLEX_IM(x, ctx); + c = NFLOAT_COMPLEX_RE(y, ctx); + d = NFLOAT_COMPLEX_IM(y, ctx); + + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) + return _nfloat_complex_mul_naive(r, s, a, b, c, d, ctx); + + if (NFLOAT_IS_ZERO(d)) + { + status = nfloat_mul(s, b, c, ctx); + status |= nfloat_mul(r, a, c, ctx); + return status; + } + + if (NFLOAT_IS_ZERO(b)) + { + status = nfloat_mul(s, a, d, ctx); + status |= nfloat_mul(r, a, c, ctx); + return status; + } + + if (NFLOAT_IS_ZERO(c)) + { + ulong t[NFLOAT_MAX_ALLOC]; + status = nfloat_mul(t, b, d, ctx); + status |= nfloat_mul(s, a, d, ctx); + status |= nfloat_neg(r, t, ctx); + return status; + } + + if (NFLOAT_IS_ZERO(a)) + { + ulong t[NFLOAT_MAX_ALLOC]; + status = nfloat_mul(t, b, d, ctx); + status |= nfloat_mul(s, b, c, ctx); + status |= nfloat_neg(r, t, ctx); + return status; + } + + if (NFLOAT_CTX_NLIMBS(ctx) < 12) + return _nfloat_complex_mul_standard(r, s, a, b, c, d, ctx); + else + return _nfloat_complex_mul_karatsuba(r, s, a, b, c, d, ctx); +} + +int +nfloat_complex_inv(nfloat_complex_ptr res, nfloat_complex_srcptr x, gr_ctx_t ctx) +{ + nfloat_srcptr a, b; + nfloat_ptr r, s; + int status; + + r = NFLOAT_COMPLEX_RE(res, ctx); + s = NFLOAT_COMPLEX_IM(res, ctx); + + a = NFLOAT_COMPLEX_RE(x, ctx); + b = NFLOAT_COMPLEX_IM(x, ctx); + + if (NFLOAT_IS_ZERO(b)) + { + status = nfloat_inv(r, a, ctx); + nfloat_zero(s, ctx); + return status; + } + + if (NFLOAT_IS_ZERO(a)) + { + status = nfloat_inv(s, b, ctx); + nfloat_neg(s, s, ctx); + nfloat_zero(r, ctx); + return status; + } + + ulong a2[NFLOAT_MAX_ALLOC]; + ulong b2[NFLOAT_MAX_ALLOC]; + ulong t[NFLOAT_MAX_ALLOC]; + + /* todo: improve */ + status = nfloat_sqr(a2, a, ctx); + status |= nfloat_sqr(b2, b, ctx); + status |= nfloat_add(t, a2, b2, ctx); + status |= nfloat_div(r, a, t, ctx); + status |= nfloat_div(s, b, t, ctx); + status |= nfloat_neg(s, s, ctx); + return status; +} + +int +nfloat_complex_div(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) +{ + nfloat_srcptr a, b, c, d; + nfloat_ptr r, s; + int status = GR_SUCCESS; + + r = NFLOAT_COMPLEX_RE(res, ctx); + s = NFLOAT_COMPLEX_IM(res, ctx); + + a = NFLOAT_COMPLEX_RE(x, ctx); + b = NFLOAT_COMPLEX_IM(x, ctx); + c = NFLOAT_COMPLEX_RE(y, ctx); + d = NFLOAT_COMPLEX_IM(y, ctx); + + /* todo: other special cases */ + + if (NFLOAT_IS_ZERO(d)) + { + if (NFLOAT_IS_ZERO(b)) + { + status = nfloat_div(r, a, c, ctx); + nfloat_zero(s, ctx); + } + else if (NFLOAT_IS_ZERO(a)) + { + status = nfloat_div(s, b, c, ctx); + nfloat_zero(r, ctx); + } + else + { + status = nfloat_div(s, b, c, ctx); + status |= nfloat_div(r, a, c, ctx); + } + } + else if (NFLOAT_IS_ZERO(c)) + { + if (NFLOAT_IS_ZERO(b)) + { + status = nfloat_div(s, a, d, ctx); + nfloat_neg(s, s, ctx); + nfloat_zero(r, ctx); + } + else if (NFLOAT_IS_ZERO(a)) + { + status = nfloat_div(r, b, d, ctx); + nfloat_zero(s, ctx); + } + else + { + status = nfloat_div(r, a, d, ctx); + status |= nfloat_div(s, b, d, ctx); + nfloat_swap(r, s, ctx); + nfloat_neg(s, s, ctx); + } + } + else + { + ulong c2[NFLOAT_MAX_ALLOC]; + ulong d2[NFLOAT_MAX_ALLOC]; + ulong t[NFLOAT_MAX_ALLOC]; + ulong u[2 * NFLOAT_MAX_ALLOC]; + + /* todo: improve */ + status = nfloat_sqr(c2, c, ctx); + status |= nfloat_sqr(d2, d, ctx); + status |= nfloat_add(t, c2, d2, ctx); + status |= nfloat_set(NFLOAT_COMPLEX_RE(u, ctx), c, ctx); + status |= nfloat_neg(NFLOAT_COMPLEX_IM(u, ctx), d, ctx); + status |= nfloat_complex_mul(res, x, u, ctx); + status |= nfloat_div(r, r, t, ctx); + status |= nfloat_div(s, s, t, ctx); + } + + return status; +} + +void +_nfloat_complex_vec_init(nfloat_complex_ptr res, slong len, gr_ctx_t ctx) +{ + _nfloat_vec_init(res, 2 * len, ctx); +} + +void +_nfloat_complex_vec_clear(nfloat_complex_ptr res, slong len, gr_ctx_t ctx) +{ + return; +} + +int +_nfloat_complex_vec_zero(nfloat_complex_ptr res, slong len, gr_ctx_t ctx) +{ + return _nfloat_vec_zero(res, 2 * len, ctx); +} + +int +_nfloat_complex_vec_set(nfloat_complex_ptr res, nfloat_complex_srcptr x, slong len, gr_ctx_t ctx) +{ + return _nfloat_vec_set(res, x, 2 * len, ctx); +} + +int +_nfloat_complex_vec_add(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, slong len, gr_ctx_t ctx) +{ + return _nfloat_vec_add(res, x, y, 2 * len, ctx); +} + +int +_nfloat_complex_vec_sub(nfloat_complex_ptr res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, slong len, gr_ctx_t ctx) +{ + return _nfloat_vec_sub(res, x, y, 2 * len, ctx); +} + + +int _nfloat_complex_methods_initialized = 0; + +gr_static_method_table _nfloat_complex_methods; + +gr_method_tab_input _nfloat_complex_methods_input[] = +{ + {GR_METHOD_CTX_WRITE, (gr_funcptr) nfloat_ctx_write}, + {GR_METHOD_CTX_IS_RING, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_COMMUTATIVE_RING, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_INTEGRAL_DOMAIN, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_FIELD, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_UNIQUE_FACTORIZATION_DOMAIN, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_FINITE, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_FINITE_CHARACTERISTIC, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_ALGEBRAICALLY_CLOSED, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_ORDERED_RING, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_EXACT, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_CANONICAL, + (gr_funcptr) gr_generic_ctx_predicate_false}, + + {GR_METHOD_CTX_HAS_REAL_PREC, (gr_funcptr) gr_generic_ctx_predicate_true}, + {GR_METHOD_CTX_SET_REAL_PREC, (gr_funcptr) _nfloat_ctx_set_real_prec}, + {GR_METHOD_CTX_GET_REAL_PREC, (gr_funcptr) _nfloat_ctx_get_real_prec}, + + {GR_METHOD_INIT, (gr_funcptr) nfloat_complex_init}, + {GR_METHOD_CLEAR, (gr_funcptr) nfloat_complex_clear}, + {GR_METHOD_SWAP, (gr_funcptr) nfloat_complex_swap}, + {GR_METHOD_SET_SHALLOW, (gr_funcptr) nfloat_complex_set}, + {GR_METHOD_RANDTEST, (gr_funcptr) nfloat_complex_randtest}, + {GR_METHOD_WRITE, (gr_funcptr) nfloat_complex_write}, + {GR_METHOD_ZERO, (gr_funcptr) nfloat_complex_zero}, + {GR_METHOD_ONE, (gr_funcptr) nfloat_complex_one}, + {GR_METHOD_NEG_ONE, (gr_funcptr) nfloat_complex_neg_one}, + {GR_METHOD_IS_ZERO, (gr_funcptr) nfloat_complex_is_zero}, + {GR_METHOD_IS_ONE, (gr_funcptr) nfloat_complex_is_one}, + {GR_METHOD_IS_NEG_ONE, (gr_funcptr) nfloat_complex_is_neg_one}, + {GR_METHOD_EQUAL, (gr_funcptr) nfloat_complex_equal}, + {GR_METHOD_SET, (gr_funcptr) nfloat_complex_set}, + {GR_METHOD_SET_SI, (gr_funcptr) nfloat_complex_set_si}, + {GR_METHOD_SET_UI, (gr_funcptr) nfloat_complex_set_ui}, + {GR_METHOD_SET_FMPZ, (gr_funcptr) nfloat_complex_set_fmpz}, + {GR_METHOD_SET_FMPQ, (gr_funcptr) nfloat_complex_set_fmpq}, + {GR_METHOD_SET_D, (gr_funcptr) nfloat_complex_set_d}, + {GR_METHOD_SET_STR, (gr_funcptr) gr_generic_set_str_ring_exponents}, +/* + {GR_METHOD_SET_OTHER, (gr_funcptr) nfloat_complex_set_other}, +*/ +/* + {GR_METHOD_GET_FMPZ, (gr_funcptr) nfloat_complex_get_fmpz}, + {GR_METHOD_GET_FMPQ, (gr_funcptr) nfloat_complex_get_fmpq}, + {GR_METHOD_GET_UI, (gr_funcptr) nfloat_complex_get_ui}, + {GR_METHOD_GET_SI, (gr_funcptr) nfloat_complex_get_si}, + {GR_METHOD_GET_D, (gr_funcptr) nfloat_complex_get_d}, +*/ + + {GR_METHOD_NEG, (gr_funcptr) nfloat_complex_neg}, + {GR_METHOD_ADD, (gr_funcptr) nfloat_complex_add}, +/* + {GR_METHOD_ADD_UI, (gr_funcptr) nfloat_complex_add_ui}, + {GR_METHOD_ADD_SI, (gr_funcptr) nfloat_complex_add_si}, + {GR_METHOD_ADD_FMPZ, (gr_funcptr) nfloat_complex_add_fmpz}, +*/ + {GR_METHOD_SUB, (gr_funcptr) nfloat_complex_sub}, +/* + {GR_METHOD_SUB_UI, (gr_funcptr) nfloat_complex_sub_ui}, + {GR_METHOD_SUB_SI, (gr_funcptr) nfloat_complex_sub_si}, + {GR_METHOD_SUB_FMPZ, (gr_funcptr) nfloat_complex_sub_fmpz}, +*/ + {GR_METHOD_MUL, (gr_funcptr) nfloat_complex_mul}, +/* + {GR_METHOD_MUL_UI, (gr_funcptr) nfloat_complex_mul_ui}, + {GR_METHOD_MUL_SI, (gr_funcptr) nfloat_complex_mul_si}, + {GR_METHOD_MUL_FMPZ, (gr_funcptr) nfloat_complex_mul_fmpz}, + {GR_METHOD_MUL_TWO, (gr_funcptr) nfloat_complex_mul_two}, +*/ +/* + {GR_METHOD_ADDMUL, (gr_funcptr) nfloat_complex_addmul}, + {GR_METHOD_SUBMUL, (gr_funcptr) nfloat_complex_submul}, +*/ + {GR_METHOD_SQR, (gr_funcptr) nfloat_complex_sqr}, + {GR_METHOD_DIV, (gr_funcptr) nfloat_complex_div}, +/* + {GR_METHOD_DIV_UI, (gr_funcptr) nfloat_complex_div_ui}, + {GR_METHOD_DIV_SI, (gr_funcptr) nfloat_complex_div_si}, +*/ +/* + {GR_METHOD_DIV_FMPZ, (gr_funcptr) nfloat_complex_div_fmpz}, +*/ + {GR_METHOD_INV, (gr_funcptr) nfloat_complex_inv}, +/* + {GR_METHOD_MUL_2EXP_SI, (gr_funcptr) nfloat_complex_mul_2exp_si}, +*/ +/* + {GR_METHOD_MUL_2EXP_FMPZ, (gr_funcptr) nfloat_complex_mul_2exp_fmpz}, + {GR_METHOD_SET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_complex_set_fmpz_2exp_fmpz}, + {GR_METHOD_GET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_complex_get_fmpz_2exp_fmpz}, +*/ + +/* + {GR_METHOD_POW, (gr_funcptr) nfloat_complex_pow}, +*/ +/* + {GR_METHOD_POW_UI, (gr_funcptr) nfloat_complex_pow_ui}, + {GR_METHOD_POW_SI, (gr_funcptr) nfloat_complex_pow_si}, + {GR_METHOD_POW_FMPZ, (gr_funcptr) nfloat_complex_pow_fmpz}, + {GR_METHOD_POW_FMPQ, (gr_funcptr) nfloat_complex_pow_fmpq}, +*/ +/* + {GR_METHOD_SQRT, (gr_funcptr) nfloat_complex_sqrt}, + {GR_METHOD_RSQRT, (gr_funcptr) nfloat_complex_rsqrt}, + + {GR_METHOD_POS_INF, (gr_funcptr) nfloat_complex_pos_inf}, + {GR_METHOD_NEG_INF, (gr_funcptr) nfloat_complex_neg_inf}, + {GR_METHOD_UINF, (gr_funcptr) gr_not_in_domain}, + {GR_METHOD_UNDEFINED, (gr_funcptr) nfloat_complex_nan}, + {GR_METHOD_UNKNOWN, (gr_funcptr) nfloat_complex_nan}, + + {GR_METHOD_FLOOR, (gr_funcptr) nfloat_complex_floor}, + {GR_METHOD_CEIL, (gr_funcptr) nfloat_complex_ceil}, + {GR_METHOD_TRUNC, (gr_funcptr) nfloat_complex_trunc}, + {GR_METHOD_NINT, (gr_funcptr) nfloat_complex_nint}, + + {GR_METHOD_ABS, (gr_funcptr) nfloat_complex_abs}, +*/ + {GR_METHOD_CONJ, (gr_funcptr) nfloat_complex_set}, + {GR_METHOD_RE, (gr_funcptr) nfloat_complex_set}, + {GR_METHOD_IM, (gr_funcptr) nfloat_complex_im}, +/* + {GR_METHOD_SGN, (gr_funcptr) nfloat_complex_sgn}, + {GR_METHOD_CSGN, (gr_funcptr) nfloat_complex_sgn}, + {GR_METHOD_CMP, (gr_funcptr) nfloat_complex_cmp}, + {GR_METHOD_CMPABS, (gr_funcptr) nfloat_complex_cmpabs}, +*/ + {GR_METHOD_I, (gr_funcptr) nfloat_complex_i}, + {GR_METHOD_PI, (gr_funcptr) nfloat_complex_pi}, +/* + {GR_METHOD_EXP, (gr_funcptr) nfloat_complex_exp}, + {GR_METHOD_EXPM1, (gr_funcptr) nfloat_complex_expm1}, + {GR_METHOD_LOG, (gr_funcptr) nfloat_complex_log}, + {GR_METHOD_LOG1P, (gr_funcptr) nfloat_complex_log1p}, + {GR_METHOD_SIN, (gr_funcptr) nfloat_complex_sin}, + {GR_METHOD_COS, (gr_funcptr) nfloat_complex_cos}, + {GR_METHOD_TAN, (gr_funcptr) nfloat_complex_tan}, + {GR_METHOD_SINH, (gr_funcptr) nfloat_complex_sinh}, + {GR_METHOD_COSH, (gr_funcptr) nfloat_complex_cosh}, + {GR_METHOD_TANH, (gr_funcptr) nfloat_complex_tanh}, + {GR_METHOD_ATAN, (gr_funcptr) nfloat_complex_atan}, + {GR_METHOD_GAMMA, (gr_funcptr) nfloat_complex_gamma}, + {GR_METHOD_ZETA, (gr_funcptr) nfloat_complex_zeta}, +*/ + + {GR_METHOD_VEC_INIT, (gr_funcptr) _nfloat_complex_vec_init}, + {GR_METHOD_VEC_CLEAR, (gr_funcptr) _nfloat_complex_vec_clear}, + {GR_METHOD_VEC_SET, (gr_funcptr) _nfloat_complex_vec_set}, + {GR_METHOD_VEC_ZERO, (gr_funcptr) _nfloat_complex_vec_zero}, + {GR_METHOD_VEC_ADD, (gr_funcptr) _nfloat_complex_vec_add}, + {GR_METHOD_VEC_SUB, (gr_funcptr) _nfloat_complex_vec_sub}, +/* + {GR_METHOD_VEC_MUL, (gr_funcptr) _nfloat_complex_vec_mul}, + {GR_METHOD_VEC_MUL_SCALAR, (gr_funcptr) _nfloat_complex_vec_mul_scalar}, + {GR_METHOD_VEC_ADDMUL_SCALAR, (gr_funcptr) _nfloat_complex_vec_addmul_scalar}, + {GR_METHOD_VEC_SUBMUL_SCALAR, (gr_funcptr) _nfloat_complex_vec_submul_scalar}, + {GR_METHOD_VEC_DOT, (gr_funcptr) _nfloat_complex_vec_dot}, + {GR_METHOD_VEC_DOT_REV, (gr_funcptr) _nfloat_complex_vec_dot_rev}, +*/ +/* + {GR_METHOD_POLY_MULLOW, (gr_funcptr) nfloat_complex_poly_mullow}, + {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_complex_poly_roots_other}, + {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_complex_mat_mul}, +*/ + {GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field}, + {GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs}, + + {0, (gr_funcptr) NULL}, +}; + +int +nfloat_complex_ctx_init(gr_ctx_t ctx, slong prec, int flags) +{ + slong nlimbs; + + if (prec <= 0 || prec > NFLOAT_MAX_LIMBS * FLINT_BITS) + return GR_UNABLE; + + nlimbs = (prec + FLINT_BITS - 1) / FLINT_BITS; + + ctx->which_ring = GR_CTX_NFLOAT_COMPLEX; + ctx->sizeof_elem = 2 * sizeof(ulong) * (nlimbs + NFLOAT_HEADER_LIMBS); + ctx->size_limit = WORD_MAX; + + NFLOAT_CTX_NLIMBS(ctx) = nlimbs; + NFLOAT_CTX_FLAGS(ctx) = flags; + NFLOAT_CTX_RND(ctx) = 0; + + ctx->methods = _nfloat_complex_methods; + + if (!_nfloat_complex_methods_initialized) + { + gr_method_tab_init(_nfloat_complex_methods, _nfloat_complex_methods_input); + _nfloat_complex_methods_initialized = 1; + } + + return GR_SUCCESS; +} diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index a0ce5e8aa2..250f15bf8d 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -50,6 +50,7 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_WRITE, (gr_funcptr) nfloat_write}, {GR_METHOD_ZERO, (gr_funcptr) nfloat_zero}, {GR_METHOD_ONE, (gr_funcptr) nfloat_one}, + {GR_METHOD_NEG_ONE, (gr_funcptr) nfloat_neg_one}, {GR_METHOD_IS_ZERO, (gr_funcptr) nfloat_is_zero}, {GR_METHOD_IS_ONE, (gr_funcptr) nfloat_is_one}, {GR_METHOD_IS_NEG_ONE, (gr_funcptr) nfloat_is_neg_one}, @@ -92,9 +93,7 @@ gr_method_tab_input _nfloat_methods_input[] = */ {GR_METHOD_ADDMUL, (gr_funcptr) nfloat_addmul}, {GR_METHOD_SUBMUL, (gr_funcptr) nfloat_submul}, -/* {GR_METHOD_SQR, (gr_funcptr) nfloat_sqr}, -*/ {GR_METHOD_DIV, (gr_funcptr) nfloat_div}, {GR_METHOD_DIV_UI, (gr_funcptr) nfloat_div_ui}, {GR_METHOD_DIV_SI, (gr_funcptr) nfloat_div_si}, @@ -214,8 +213,18 @@ nfloat_ctx_init(gr_ctx_t ctx, slong prec, int flags) int nfloat_ctx_write(gr_stream_t out, gr_ctx_t ctx) { - gr_stream_write(out, "Floating-point numbers with prec = "); - gr_stream_write_si(out, NFLOAT_CTX_PREC(ctx)); - gr_stream_write(out, " (nfloat)"); - return GR_SUCCESS; + if (ctx->which_ring == GR_CTX_NFLOAT_COMPLEX) + { + gr_stream_write(out, "Complex floating-point numbers with prec = "); + gr_stream_write_si(out, NFLOAT_CTX_PREC(ctx)); + gr_stream_write(out, " (nfloat_complex)"); + return GR_SUCCESS; + } + else + { + gr_stream_write(out, "Floating-point numbers with prec = "); + gr_stream_write_si(out, NFLOAT_CTX_PREC(ctx)); + gr_stream_write(out, " (nfloat)"); + return GR_SUCCESS; + } } diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index 21ccac0630..e9fe020e4f 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -513,6 +513,12 @@ nfloat_get_arf(arf_t res, nfloat_srcptr x, gr_ctx_t ctx) } else { + if (!LIMB_MSB_IS_SET(NFLOAT_D(x)[NFLOAT_CTX_NLIMBS(ctx) - 1])) + { + flint_printf("bad nfloat!\n"); + flint_abort(); + } + arf_set_mpn(res, NFLOAT_D(x), NFLOAT_CTX_NLIMBS(ctx), NFLOAT_SGNBIT(x)); arf_mul_2exp_si(res, res, NFLOAT_EXP(x) - FLINT_BITS * NFLOAT_CTX_NLIMBS(ctx)); } @@ -2163,6 +2169,71 @@ nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) return GR_SUCCESS; } +int +nfloat_sqr(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + mp_limb_pair_t mul_res; + slong nlimbs; + + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_zero(res, ctx); + else + return nfloat_abs(res, x, ctx); + } + + nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + if (nlimbs == 1) + { + ulong hi, lo; + + umul_ppmm(hi, lo, NFLOAT_D(x)[0], NFLOAT_D(x)[0]); + + if (LIMB_MSB_IS_SET(hi)) + { + NFLOAT_D(res)[0] = hi; + NFLOAT_EXP(res) = 2 * NFLOAT_EXP(x); + } + else + { + NFLOAT_D(res)[0] = (hi << 1) | (lo >> (FLINT_BITS - 1)); + NFLOAT_EXP(res) = 2 * NFLOAT_EXP(x) - 1; + } + } + else if (nlimbs == 2) + { + ulong r3, r2, r1, FLINT_SET_BUT_UNUSED(r0); + + FLINT_MPN_SQR_2X2(r3, r2, r1, r0, NFLOAT_D(x)[1], NFLOAT_D(x)[0]); + + if (LIMB_MSB_IS_SET(r3)) + { + NFLOAT_D(res)[0] = r2; + NFLOAT_D(res)[1] = r3; + NFLOAT_EXP(res) = 2 * NFLOAT_EXP(x); + } + else + { + NFLOAT_D(res)[0] = (r2 << 1) | (r1 >> (FLINT_BITS - 1)); + NFLOAT_D(res)[1] = (r3 << 1) | (r2 >> (FLINT_BITS - 1)); + NFLOAT_EXP(res) = 2 * NFLOAT_EXP(x) - 1; + } + } + else + { + /* todo: sqrhigh_normalised */ + mul_res = flint_mpn_mulhigh_normalised2(NFLOAT_D(res), NFLOAT_D(x), NFLOAT_D(x), NFLOAT_CTX_NLIMBS(ctx)); + NFLOAT_EXP(res) = 2 * NFLOAT_EXP(x) - mul_res.m2; + } + + NFLOAT_SGNBIT(res) = 0; + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +/* todo: squaring */ int _nfloat_vec_mul_1(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) { @@ -2219,6 +2290,7 @@ _nfloat_vec_mul_1(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, g return status; } +/* todo: squaring */ int _nfloat_vec_mul_2(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) { @@ -2277,6 +2349,7 @@ _nfloat_vec_mul_2(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, g return status; } +/* todo: squaring */ int _nfloat_vec_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) { diff --git a/src/nfloat/profile/p-vs_acf.c b/src/nfloat/profile/p-vs_acf.c new file mode 100644 index 0000000000..83fde0defb --- /dev/null +++ b/src/nfloat/profile/p-vs_acf.c @@ -0,0 +1,165 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include "fmpz.h" +#include "gr.h" +#include "gr_special.h" +#include "gr_vec.h" +#include "gr_mat.h" +#include "acf.h" +#include "nfloat.h" +#include "profiler.h" +#include "double_extras.h" + +#if 1 +#undef TIMEIT_END_REPEAT +#define TIMEIT_END_REPEAT(__timer, __reps) \ + } \ + timeit_stop(__timer); \ + if (__timer->cpu >= 100) \ + break; \ + __reps *= 10; \ + } \ + } while (0); +#endif + +int main() +{ + gr_ptr vec1, vec2, vec3; + gr_ptr x, I; + gr_ctx_t ctx; + int which; + slong i, n; + slong prec; + double __, t, acf_tadd = 0.0, acf_tmul = 0.0, acf_tmul_scalar = 0.0, acf_taddmul_scalar = 0.0, acf_tsum = 0.0, acf_tprod = 0.0, acf_tdot = 0.0; + double nfloat_tadd = 0.0, nfloat_tmul = 0.0, nfloat_tmul_scalar = 0.0, nfloat_taddmul_scalar = 0.0, nfloat_tsum = 0.0, nfloat_tprod = 0.0, nfloat_tdot = 0.0; + + flint_printf(" _gr_vec_add _gr_vec_mul _gr_vec_mul_scalar _gr_vec_addmul_scalar _gr_vec_sum _gr_vec_product _gr_vec_dot\n"); + + for (prec = 64; prec <= 4096; prec = prec < 256 ? prec + 64 : prec * 2) + { + flint_printf("prec = %wd\n", prec); + + for (n = 10; n <= 100; n *= 10) + { + for (which = 0; which < 2; which++) + { + flint_rand_t state; + flint_rand_init(state); + + if (which == 0) + gr_ctx_init_complex_float_acf(ctx, prec); + else + nfloat_complex_ctx_init(ctx, prec, 0); + + x = gr_heap_init(ctx); + I = gr_heap_init(ctx); + vec1 = gr_heap_init_vec(n, ctx); + vec2 = gr_heap_init_vec(n, ctx); + vec3 = gr_heap_init_vec(n, ctx); + + GR_MUST_SUCCEED(gr_i(I, ctx)); + + for (i = 0; i < n; i++) + { + gr_ptr v1 = GR_ENTRY(vec1, i, ctx->sizeof_elem); + gr_ptr v2 = GR_ENTRY(vec2, i, ctx->sizeof_elem); + + GR_MUST_SUCCEED(gr_set_si(v1, 1 + n_randint(state, 1000), ctx)); + if (n_randint(state, 2)) + GR_MUST_SUCCEED(gr_neg(v1, v1, ctx)); + GR_MUST_SUCCEED(gr_mul(v1, v1, I, ctx)); + GR_MUST_SUCCEED(gr_add_si(v1, v1, 1 + n_randint(state, 1000), ctx)); + if (n_randint(state, 2)) + GR_MUST_SUCCEED(gr_neg(v1, v1, ctx)); + GR_MUST_SUCCEED(gr_div_ui(v1, v1, 1 + n_randint(state, 1000), ctx)); + + GR_MUST_SUCCEED(gr_set_si(v2, 1 + n_randint(state, 1000), ctx)); + if (n_randint(state, 2)) + GR_MUST_SUCCEED(gr_neg(v2, v2, ctx)); + GR_MUST_SUCCEED(gr_mul(v2, v2, I, ctx)); + GR_MUST_SUCCEED(gr_add_si(v2, v2, 1 + n_randint(state, 1000), ctx)); + if (n_randint(state, 2)) + GR_MUST_SUCCEED(gr_neg(v2, v2, ctx)); + GR_MUST_SUCCEED(gr_div_ui(v2, v2, 1 + n_randint(state, 1000), ctx)); + +/* + if (n_randint(state, 10) == 0) + GR_MUST_SUCCEED(gr_zero(v2, ctx)); +*/ + } + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_add(vec3, vec1, vec2, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_tadd = t; else nfloat_tadd = t; + (void) __; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_mul(vec3, vec1, vec2, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_tmul = t; else nfloat_tmul = t; + (void) __; + + GR_MUST_SUCCEED(gr_pi(x, ctx)); + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_mul_scalar(vec3, vec1, n, x, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_tmul_scalar = t; else nfloat_tmul_scalar = t; + (void) __; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_addmul_scalar(vec3, vec1, n, x, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_taddmul_scalar = t; else nfloat_taddmul_scalar = t; + (void) __; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_sum(x, vec1, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_tsum = t; else nfloat_tsum = t; + (void) __; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_product(x, vec1, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_tprod = t; else nfloat_tprod = t; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_dot(x, NULL, 0, vec1, vec2, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) acf_tdot = t; else nfloat_tdot = t; + + gr_heap_clear(x, ctx); + gr_heap_clear(I, ctx); + gr_heap_clear_vec(vec1, n, ctx); + gr_heap_clear_vec(vec2, n, ctx); + gr_heap_clear_vec(vec3, n, ctx); + + flint_rand_clear(state); + } + + flint_printf("n = %4wd ", n); + flint_printf(" %.3e (%.3fx) %.3e (%.3fx) %.3e (%.3fx) %.3e (%.3fx) %.3e (%.3fx) %.3e (%.3fx) %.3e (%.3fx)\n", + nfloat_tadd, acf_tadd / nfloat_tadd, + nfloat_tmul, acf_tmul / nfloat_tmul, + nfloat_tmul_scalar, acf_tmul_scalar / nfloat_tmul_scalar, + nfloat_taddmul_scalar, acf_taddmul_scalar / nfloat_taddmul_scalar, + nfloat_tsum, acf_tsum / nfloat_tsum, + nfloat_tprod, acf_tprod / nfloat_tprod, + nfloat_tdot, acf_tdot / nfloat_tdot); + } + } + + return 0; +} diff --git a/src/nfloat/profile/p-vs_arf.c b/src/nfloat/profile/p-vs_arf.c index 4d459dce20..7af7fbc1c9 100644 --- a/src/nfloat/profile/p-vs_arf.c +++ b/src/nfloat/profile/p-vs_arf.c @@ -45,7 +45,7 @@ int main() flint_printf(" _gr_vec_add _gr_vec_mul _gr_vec_mul_scalar _gr_vec_addmul_scalar _gr_vec_sum _gr_vec_product _gr_vec_dot\n"); - for (prec = 64; prec <= 2048; prec = prec < 256 ? prec + 64 : prec * 2) + for (prec = 64; prec <= 4096; prec = prec < 256 ? prec + 64 : prec * 2) { flint_printf("prec = %wd\n", prec); diff --git a/src/nfloat/test/main.c b/src/nfloat/test/main.c index 9c694c98f2..0a129221f1 100644 --- a/src/nfloat/test/main.c +++ b/src/nfloat/test/main.c @@ -14,6 +14,7 @@ #include "t-add_sub_n.c" #include "t-addmul_submul.c" #include "t-nfloat.c" +#include "t-nfloat_complex.c" /* Array of test functions ***************************************************/ @@ -22,6 +23,7 @@ test_struct tests[] = TEST_FUNCTION(add_sub_n), TEST_FUNCTION(addmul_submul), TEST_FUNCTION(nfloat), + TEST_FUNCTION(nfloat_complex), }; /* main function *************************************************************/ diff --git a/src/nfloat/test/t-nfloat.c b/src/nfloat/test/t-nfloat.c index ac275b76ee..05fa2b3977 100644 --- a/src/nfloat/test/t-nfloat.c +++ b/src/nfloat/test/t-nfloat.c @@ -12,6 +12,7 @@ #include "test_helpers.h" #include "fmpq.h" #include "arf.h" +#include "acf.h" #include "gr_vec.h" #include "gr_special.h" #include "nfloat.h" @@ -94,7 +95,6 @@ gr_test_approx_unary_op(gr_ctx_t R, gr_method_unary_op op, gr_ctx_t R_ref, gr_sr if (status == GR_SUCCESS) { status |= gr_set_other(rel_err, b, R, R_ref); - status |= gr_sub(rel_err, b_ref, rel_err, R_ref); status |= gr_div(rel_err, rel_err, b_ref, R_ref); status |= gr_abs(rel_err, rel_err, R_ref); @@ -534,6 +534,46 @@ TEST_FUNCTION_START(nfloat, state) gr_ctx_t ctx; gr_ctx_t ctx2; slong prec; + slong iter; + + for (prec = NFLOAT_MIN_LIMBS * FLINT_BITS; prec <= NFLOAT_MAX_LIMBS * FLINT_BITS; prec += FLINT_BITS) + { + nfloat_complex_ctx_init(ctx, prec, 0); + + gr_test_floating_point(ctx, 100 * flint_test_multiplier(), 0); + + { + gr_ptr tol1, tol; + slong i, reps; + + gr_ctx_init_complex_acb(ctx2, prec + 32); + + tol1 = gr_heap_init(ctx); + tol = gr_heap_init(ctx2); + + GR_IGNORE(gr_one(tol, ctx2)); + GR_IGNORE(gr_mul_2exp_si(tol, tol, -prec + 3, ctx2)); + + reps = (prec <= 256 ? 10000 : 1) * flint_test_multiplier(); + + for (i = 0; i < reps; i++) + { + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_add, ctx2, tol, state, 0); + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_sub, ctx2, tol, state, 0); + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_mul, ctx2, tol, state, 0); + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_div, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_neg, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sqr, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_inv, ctx2, tol, state, 0); + } + + gr_heap_clear(tol1, ctx); + gr_heap_clear(tol, ctx2); + gr_ctx_clear(ctx2); + } + + gr_ctx_clear(ctx); + } for (prec = NFLOAT_MIN_LIMBS * FLINT_BITS; prec <= NFLOAT_MAX_LIMBS * FLINT_BITS; prec += FLINT_BITS) { @@ -631,6 +671,7 @@ TEST_FUNCTION_START(nfloat, state) gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_neg, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_abs, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sgn, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sqr, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_inv, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sqrt, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_rsqrt, ctx2, tol, state, 0); @@ -705,5 +746,70 @@ TEST_FUNCTION_START(nfloat, state) gr_ctx_clear(ctx); } + nfloat_ctx_init(ctx, FLINT_BITS, 0); + + for (iter = 0; iter < 10000 * flint_test_multiplier(); iter++) + { + ulong x[3]; + ulong r[NFLOAT_MAX_ALLOC]; + ulong s[NFLOAT_MAX_ALLOC]; + int s1, s2; + slong exp; + int sgn; + + x[0] = n_randint(state, 2) ? 0 : n_randtest(state); + x[1] = n_randint(state, 2) ? 0 : n_randtest(state); + x[2] = n_randint(state, 2) ? 0 : n_randtest(state); + exp = (slong) n_randint(state, 100) - 100; + sgn = n_randint(state, 2); + + s1 = nfloat_1_set_3_2exp(r, x[2], x[1], x[0], exp, sgn, ctx); + s2 = nfloat_set_mpn_2exp(s, x, 3, exp, sgn, ctx); + + if (s1 != s2 || nfloat_equal(r, s, ctx) == T_FALSE) + { + flint_printf("FAIL: nfloat_1_set_3_2exp\n"); + flint_mpn_debug(x, 3); + gr_println(r, ctx); + gr_println(s, ctx); + flint_abort(); + } + } + + gr_ctx_clear(ctx); + + nfloat_ctx_init(ctx, 2 * FLINT_BITS, 0); + + for (iter = 0; iter < 10000 * flint_test_multiplier(); iter++) + { + ulong x[4]; + ulong r[NFLOAT_MAX_ALLOC]; + ulong s[NFLOAT_MAX_ALLOC]; + int s1, s2; + slong exp; + int sgn; + + x[0] = n_randint(state, 2) ? 0 : n_randtest(state); + x[1] = n_randint(state, 2) ? 0 : n_randtest(state); + x[2] = n_randint(state, 2) ? 0 : n_randtest(state); + x[3] = n_randint(state, 2) ? 0 : n_randtest(state); + exp = (slong) n_randint(state, 100) - 100; + sgn = n_randint(state, 2); + + s1 = nfloat_2_set_4_2exp(r, x[3], x[2], x[1], x[0], exp, sgn, ctx); + s2 = nfloat_set_mpn_2exp(s, x, 4, exp, sgn, ctx); + + if (s1 != s2 || nfloat_equal(r, s, ctx) == T_FALSE) + { + flint_printf("FAIL: nfloat_2_set_4_2exp\n"); + flint_mpn_debug(x, 4); + gr_println(r, ctx); + gr_println(s, ctx); + flint_abort(); + } + } + + gr_ctx_clear(ctx); + TEST_FUNCTION_END(state); } diff --git a/src/nfloat/test/t-nfloat_complex.c b/src/nfloat/test/t-nfloat_complex.c new file mode 100644 index 0000000000..f800dbe883 --- /dev/null +++ b/src/nfloat/test/t-nfloat_complex.c @@ -0,0 +1,29 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "test_helpers.h" +#include "fmpq.h" +#include "arf.h" +#include "gr_vec.h" +#include "gr_special.h" +#include "nfloat.h" + +TEST_FUNCTION_START(nfloat_complex, state) +{ +/* + gr_ctx_t ctx; + gr_ctx_t ctx2; + slong prec; + slong iter; +*/ + + TEST_FUNCTION_END(state); +} diff --git a/src/python/flint_ctypes.py b/src/python/flint_ctypes.py index f331978b72..6ed1d337dd 100644 --- a/src/python/flint_ctypes.py +++ b/src/python/flint_ctypes.py @@ -4713,13 +4713,39 @@ def _default_context(): return _nfloat_class - class RealFloat_nfloat(gr_ctx): def __init__(self, prec=128): gr_ctx.__init__(self) libflint.nfloat_ctx_init(self._ref, prec, 0) self._elem_type = get_nfloat_class(prec) +@functools.cache +def get_nfloat_complex_class(prec): + n = (prec + FLINT_BITS - 1) // FLINT_BITS + prec = n * FLINT_BITS + + class _nfloat_complex_struct(ctypes.Structure): + _fields_ = [('val', c_ulong * (2 * (n + 2)))] + + _nfloat_complex_struct.__qualname__ = _nfloat_complex_struct.__name__ = ("nfloat" + str(prec) + "_complex_struct") + + class _nfloat_complex_class(gr_elem): + _struct_type = _nfloat_complex_struct + + @staticmethod + def _default_context(): + raise NotImplementedError + + _nfloat_complex_class.__qualname__ = _nfloat_complex_class.__name__ = ("nfloat" + str(prec) + "_complex") + + return _nfloat_complex_class + +class ComplexFloat_nfloat_complex(gr_ctx): + def __init__(self, prec=128): + gr_ctx.__init__(self) + libflint.nfloat_complex_ctx_init(self._ref, prec, 0) + self._elem_type = get_nfloat_complex_class(prec) +