From 8645932a86d1b1d169df027aa8d0ed95e42b10d1 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Sat, 20 Jul 2024 12:27:57 +0200 Subject: [PATCH 1/3] proper cmpabs for acb, acf --- src/gr/acb.c | 103 ++++++++++++++++++++++++++++++++++--- src/gr/acf.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 234 insertions(+), 11 deletions(-) diff --git a/src/gr/acb.c b/src/gr/acb.c index 0a14fbd78e..253ae17b50 100644 --- a/src/gr/acb.c +++ b/src/gr/acb.c @@ -987,21 +987,108 @@ _gr_acb_cmp(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx) } } +int +_gr_arb_cmpabs(int * res, const arb_t x, const arb_t y, const gr_ctx_t ctx); + int _gr_acb_cmpabs(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx) { - acb_t t, u; + if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y))) + { + arb_srcptr a = acb_realref(x); + arb_srcptr c = acb_realref(y); - *t = *x; - *u = *y; + /* OK; ignores the context object */ + return _gr_arb_cmpabs(res, a, c, ctx); + } + else + { + slong prec = ACB_CTX_PREC(ctx); + int status = GR_SUCCESS; - if (arf_sgn(arb_midref(acb_realref(t))) < 0) - ARF_NEG(arb_midref(acb_realref(t))); + arb_srcptr a = acb_realref(x); + arb_srcptr b = acb_imagref(x); + arb_srcptr c = acb_realref(y); + arb_srcptr d = acb_imagref(y); + + mag_t xlo, xhi, ylo, yhi, t; + + mag_init(xlo); + mag_init(xhi); + mag_init(ylo); + mag_init(yhi); + mag_init(t); + + arb_get_mag_lower(xlo, a); + arb_get_mag_lower(t, b); + mag_mul_lower(xlo, xlo, xlo); + mag_mul_lower(t, t, t); + mag_add_lower(xlo, xlo, t); + + arb_get_mag_lower(ylo, c); + arb_get_mag_lower(t, d); + mag_mul_lower(ylo, ylo, ylo); + mag_mul_lower(t, t, t); + mag_add_lower(ylo, ylo, t); + + arb_get_mag(xhi, a); + arb_get_mag(t, b); + mag_mul(xhi, xhi, xhi); + mag_mul(t, t, t); + mag_add(xhi, xhi, t); + + arb_get_mag(yhi, c); + arb_get_mag(t, d); + mag_mul(yhi, yhi, yhi); + mag_mul(t, t, t); + mag_add(yhi, yhi, t); + + if (mag_cmp(xhi, ylo) < 0) + { + *res = -1; + status = GR_SUCCESS; + } + else if (mag_cmp(xlo, yhi) > 0) + { + *res = 1; + status = GR_SUCCESS; + } + else + { + arb_t t, u; - if (arf_sgn(arb_midref(acb_realref(u))) < 0) - ARF_NEG(arb_midref(acb_realref(u))); + arb_init(t); + arb_init(u); - return _gr_acb_cmp(res, t, u, ctx); + arb_mul(t, a, a, prec); + arb_addmul(t, b, b, prec); + arb_mul(u, c, c, prec); + arb_addmul(u, d, d, prec); + + if ((arb_is_exact(t) && arb_is_exact(u)) || !arb_overlaps(t, u)) + { + *res = arf_cmp(arb_midref(t), arb_midref(u)); + status = GR_SUCCESS; + } + else + { + /* todo: worth it to do an exact computation? */ + *res = 0; + status = GR_UNABLE; + } + + arb_clear(t); + arb_clear(u); + } + + mag_clear(xlo); + mag_clear(xhi); + mag_clear(ylo); + mag_clear(yhi); + mag_clear(t); + + return status; + } } int diff --git a/src/gr/acf.c b/src/gr/acf.c index a2ec128a3c..dc5be3b228 100644 --- a/src/gr/acf.c +++ b/src/gr/acf.c @@ -666,13 +666,149 @@ _gr_acf_cmp(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx) return GR_SUCCESS; } +/* ignores ctx, so we can pass in the acf context */ +int +_gr_arf_cmpabs(int * res, const arf_t x, const arf_t y, const gr_ctx_t ctx); + +#include "double_extras.h" + int _gr_acf_cmpabs(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx) { - if (!arf_is_zero(acf_imagref(x)) || !arf_is_zero(acf_imagref(y))) - return GR_UNABLE; + arf_srcptr a = acf_realref(x); + arf_srcptr b = acf_imagref(x); + arf_srcptr c = acf_realref(y); + arf_srcptr d = acf_imagref(y); + + if (arf_is_zero(b)) + { + if (arf_is_zero(d)) + return _gr_arf_cmpabs(res, a, c, ctx); + if (arf_is_zero(c)) + return _gr_arf_cmpabs(res, a, d, ctx); + } + + if (arf_is_zero(a)) + { + if (arf_is_zero(d)) + return _gr_arf_cmpabs(res, b, c, ctx); + if (arf_is_zero(c)) + return _gr_arf_cmpabs(res, b, d, ctx); + } + + if (arf_is_zero(c)) + { + if (arf_is_zero(a)) + return _gr_arf_cmpabs(res, b, d, ctx); + if (arf_is_zero(b)) + return _gr_arf_cmpabs(res, a, d, ctx); + } + + if (arf_is_zero(d)) + { + if (arf_is_zero(a)) + return _gr_arf_cmpabs(res, b, c, ctx); + if (arf_is_zero(b)) + return _gr_arf_cmpabs(res, a, c, ctx); + } + + if (ARF_IS_LAGOM(a) && ARF_IS_LAGOM(b) && ARF_IS_LAGOM(c) && ARF_IS_LAGOM(d)) + { + slong aexp, bexp, cexp, dexp, xexp, yexp, exp; + + aexp = arf_is_zero(a) ? WORD_MIN : ARF_EXP(a); + bexp = arf_is_zero(b) ? WORD_MIN : ARF_EXP(b); + cexp = arf_is_zero(c) ? WORD_MIN : ARF_EXP(c); + dexp = arf_is_zero(d) ? WORD_MIN : ARF_EXP(d); + + /* 0.5 * 2^xexp <= |x| < sqrt(2) * 2^xexp */ + xexp = FLINT_MAX(aexp, bexp); + /* 0.5 * 2^yexp <= |y| < sqrt(2) * 2^yexp */ + yexp = FLINT_MAX(cexp, dexp); + + if (xexp + 2 < yexp) + { + *res = -1; + return GR_SUCCESS; + } + + if (xexp > yexp + 2) + { + *res = 1; + return GR_SUCCESS; + } + + exp = FLINT_MAX(xexp, yexp); + + double tt, xx = 0.0, yy = 0.0; + nn_srcptr xp; + slong xn; + + if (aexp >= exp - 53) + { + ARF_GET_MPN_READONLY(xp, xn, a); + tt = d_mul_2exp_inrange(xp[xn - 1], aexp - exp - FLINT_BITS); + xx += tt * tt; + } + + if (bexp >= exp - 53) + { + ARF_GET_MPN_READONLY(xp, xn, b); + tt = d_mul_2exp_inrange(xp[xn - 1], bexp - exp - FLINT_BITS); + xx += tt * tt; + } + + if (cexp >= exp - 53) + { + ARF_GET_MPN_READONLY(xp, xn, c); + tt = d_mul_2exp_inrange(xp[xn - 1], cexp - exp - FLINT_BITS); + yy += tt * tt; + } + + if (dexp >= exp - 53) + { + ARF_GET_MPN_READONLY(xp, xn, d); + tt = d_mul_2exp_inrange(xp[xn - 1], dexp - exp - FLINT_BITS); + yy += tt * tt; + } + + if (xx < yy * 0.999999) + { + *res = -1; + return GR_SUCCESS; + } + + if (xx * 0.999999 > yy) + { + *res = 1; + return GR_SUCCESS; + } + } + + arf_struct s[5]; + + arf_init(s + 0); + arf_init(s + 1); + arf_init(s + 2); + arf_init(s + 3); + arf_init(s + 4); + + arf_mul(s + 0, a, a, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_mul(s + 1, b, b, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_mul(s + 2, c, c, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_mul(s + 3, d, d, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_neg(s + 2, s + 2); + arf_neg(s + 3, s + 3); + arf_sum(s + 4, s, 4, 30, ARF_RND_DOWN); + + *res = arf_sgn(s + 4); + + arf_clear(s + 0); + arf_clear(s + 1); + arf_clear(s + 2); + arf_clear(s + 3); + arf_clear(s + 4); - *res = arf_cmpabs(acf_realref(x), acf_realref(y)); return GR_SUCCESS; } From 373c6ec43aa44807c6aac3d92f05ff43505e80b3 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Sat, 20 Jul 2024 12:29:12 +0200 Subject: [PATCH 2/3] cmpabs & linear algebra tuning for nfloat, nfloat_complex --- doc/source/nfloat.rst | 7 ++ src/gr/test_ring.c | 7 +- src/nfloat.h | 9 +++ src/nfloat/complex.c | 141 ++++++++++++++++++++++++++++++++++++-- src/nfloat/ctx.c | 3 + src/nfloat/mat.c | 156 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 315 insertions(+), 8 deletions(-) create mode 100644 src/nfloat/mat.c diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index ffdb6638e4..39d425ce52 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -324,6 +324,10 @@ Matrix functions Different implementations of matrix multiplication. +.. function:: int nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx) + int nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx) + int nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx) + Internal functions ------------------------------------------------------------------------------- @@ -417,3 +421,6 @@ real pairs. int nfloat_complex_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx) int nfloat_complex_mat_mul_reorder(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) int nfloat_complex_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) + int nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx) + int nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx) + int nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx) diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c index c584484772..d1a22be592 100644 --- a/src/gr/test_ring.c +++ b/src/gr/test_ring.c @@ -2904,7 +2904,7 @@ gr_test_ordered_ring_cmpabs(gr_ctx_t R, flint_rand_t state, int test_flags) status = GR_TEST_FAIL; } - if (status & GR_DOMAIN && !(status & GR_UNABLE)) + if (gr_ctx_is_ordered_ring(R) == T_TRUE && (status & GR_DOMAIN && !(status & GR_UNABLE))) { status = GR_TEST_FAIL; } @@ -4315,10 +4315,9 @@ gr_test_ring(gr_ctx_t R, slong iters, int test_flags) gr_test_iter(R, state, "pow: ui/si/fmpz/fmpq", gr_test_pow_type_variants, iters, test_flags & (~GR_TEST_ALWAYS_ABLE)); if (gr_ctx_is_ordered_ring(R) == T_TRUE) - { gr_test_iter(R, state, "ordered_ring_cmp", gr_test_ordered_ring_cmp, iters, test_flags); - gr_test_iter(R, state, "ordered_ring_cmpabs", gr_test_ordered_ring_cmpabs, iters, test_flags); - } + + gr_test_iter(R, state, "ordered_ring_cmpabs", gr_test_ordered_ring_cmpabs, iters, test_flags); gr_test_iter(R, state, "numerator_denominator", gr_test_numerator_denominator, iters, test_flags); gr_test_iter(R, state, "complex_parts", gr_test_complex_parts, iters, test_flags); diff --git a/src/nfloat.h b/src/nfloat.h index f0fdcf2985..12b7363e66 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -458,6 +458,11 @@ int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ct int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx); int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); +int nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx); +int nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx); +int nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx); + + /* Complex numbers */ /* Note: we use the same context data for real and complex rings (only which_ring and sizeof_elem differ). This allows us to call @@ -569,6 +574,10 @@ int nfloat_complex_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int nfloat_complex_mat_mul_reorder(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); int nfloat_complex_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); +int nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx); +int nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx); +int nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx); + #ifdef __cplusplus } #endif diff --git a/src/nfloat/complex.c b/src/nfloat/complex.c index ca80a50e6f..8b38bd81cb 100644 --- a/src/nfloat/complex.c +++ b/src/nfloat/complex.c @@ -15,6 +15,7 @@ #include "gr_generic.h" #include "acf.h" #include "acb.h" +#include "mag.h" #include "nfloat.h" static int @@ -1542,17 +1543,146 @@ nfloat_complex_cmp(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, return nfloat_cmp(res, NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx); } +#include "double_extras.h" + int nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx) { + nfloat_srcptr a, b, c, d; + slong aexp, bexp, cexp, dexp, xexp, yexp, exp; + slong xn = NFLOAT_CTX_NLIMBS(ctx); + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) return GR_UNABLE; - if (!NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(x, ctx)) || - !NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(y, ctx))) - return GR_UNABLE; + a = NFLOAT_COMPLEX_RE(x, ctx); + b = NFLOAT_COMPLEX_IM(x, ctx); + c = NFLOAT_COMPLEX_RE(y, ctx); + d = NFLOAT_COMPLEX_IM(y, ctx); + + if (NFLOAT_IS_ZERO(b)) + { + if (NFLOAT_IS_ZERO(d)) + return nfloat_cmpabs(res, a, c, ctx); + if (NFLOAT_IS_ZERO(c)) + return nfloat_cmpabs(res, a, d, ctx); + } + + if (NFLOAT_IS_ZERO(a)) + { + if (NFLOAT_IS_ZERO(d)) + return nfloat_cmpabs(res, b, c, ctx); + if (NFLOAT_IS_ZERO(c)) + return nfloat_cmpabs(res, b, d, ctx); + } + + if (NFLOAT_IS_ZERO(c)) + { + if (NFLOAT_IS_ZERO(a)) + return nfloat_cmpabs(res, b, d, ctx); + if (NFLOAT_IS_ZERO(b)) + return nfloat_cmpabs(res, a, d, ctx); + } + + if (NFLOAT_IS_ZERO(d)) + { + if (NFLOAT_IS_ZERO(a)) + return nfloat_cmpabs(res, b, c, ctx); + if (NFLOAT_IS_ZERO(b)) + return nfloat_cmpabs(res, a, c, ctx); + } + + aexp = NFLOAT_EXP(a); + bexp = NFLOAT_EXP(b); + cexp = NFLOAT_EXP(c); + dexp = NFLOAT_EXP(d); + + /* 0.5 * 2^xexp <= |x| < sqrt(2) * 2^xexp */ + xexp = FLINT_MAX(aexp, bexp); + /* 0.5 * 2^yexp <= |y| < sqrt(2) * 2^yexp */ + yexp = FLINT_MAX(cexp, dexp); + + if (xexp + 2 < yexp) + { + *res = -1; + return GR_SUCCESS; + } + + if (xexp > yexp + 2) + { + *res = 1; + return GR_SUCCESS; + } + + exp = FLINT_MAX(xexp, yexp); + + double tt, xx = 0.0, yy = 0.0; + + if (aexp >= exp - 53) + { + tt = d_mul_2exp_inrange(NFLOAT_D(a)[xn - 1], aexp - exp - FLINT_BITS); + xx += tt * tt; + } - return nfloat_cmpabs(res, NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx); + if (bexp >= exp - 53) + { + tt = d_mul_2exp_inrange(NFLOAT_D(b)[xn - 1], bexp - exp - FLINT_BITS); + xx += tt * tt; + } + + if (cexp >= exp - 53) + { + tt = d_mul_2exp_inrange(NFLOAT_D(c)[xn - 1], cexp - exp - FLINT_BITS); + yy += tt * tt; + } + + if (dexp >= exp - 53) + { + tt = d_mul_2exp_inrange(NFLOAT_D(d)[xn - 1], dexp - exp - FLINT_BITS); + yy += tt * tt; + } + + if (xx < yy * 0.999999) + { + *res = -1; + return GR_SUCCESS; + } + + if (xx * 0.999999 > yy) + { + *res = 1; + return GR_SUCCESS; + } + + arf_struct s[5]; + + arf_init(s + 0); + arf_init(s + 1); + arf_init(s + 2); + arf_init(s + 3); + arf_init(s + 4); + + nfloat_get_arf(s + 0, a, ctx); + nfloat_get_arf(s + 1, b, ctx); + nfloat_get_arf(s + 2, c, ctx); + nfloat_get_arf(s + 3, d, ctx); + + arf_mul(s + 0, s + 0, s + 0, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_mul(s + 1, s + 1, s + 1, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_mul(s + 2, s + 2, s + 2, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_mul(s + 3, s + 3, s + 3, ARF_PREC_EXACT, ARF_RND_DOWN); + arf_neg(s + 2, s + 2); + arf_neg(s + 3, s + 3); + arf_sum(s + 4, s, 4, 30, ARF_RND_DOWN); + *res = arf_sgn(s + 4); + + arf_clear(s + 0); + arf_clear(s + 1); + arf_clear(s + 2); + arf_clear(s + 3); + arf_clear(s + 4); + + return GR_SUCCESS; } int @@ -1794,6 +1924,9 @@ gr_method_tab_input _nfloat_complex_methods_input[] = {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_complex_poly_roots_other}, */ {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_complex_mat_mul}, + {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIL, (gr_funcptr) nfloat_complex_mat_nonsingular_solve_tril}, + {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIU, (gr_funcptr) nfloat_complex_mat_nonsingular_solve_triu}, + {GR_METHOD_MAT_LU, (gr_funcptr) nfloat_complex_mat_lu}, {GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field}, {GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs}, diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index 6178b877cc..c73f1a1a1e 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -175,6 +175,9 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other}, */ {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul}, + {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIL, (gr_funcptr) nfloat_mat_nonsingular_solve_tril}, + {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIU, (gr_funcptr) nfloat_mat_nonsingular_solve_triu}, + {GR_METHOD_MAT_LU, (gr_funcptr) nfloat_mat_lu}, {GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field}, {GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs}, diff --git a/src/nfloat/mat.c b/src/nfloat/mat.c new file mode 100644 index 0000000000..25d3ff271f --- /dev/null +++ b/src/nfloat/mat.c @@ -0,0 +1,156 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "gr.h" +#include "nfloat.h" +#include "gr_mat.h" + +int +nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, + const gr_mat_t B, int unit, gr_ctx_t ctx) +{ + slong cutoff, prec = NFLOAT_CTX_PREC(ctx); + + if (prec <= 256) + cutoff = 96; + else if (prec <= 512) + cutoff = 16; + else if (prec <= 576) + cutoff = 32; + else if (prec <= 1536) + cutoff = 8; + else if (prec <= 2176) + cutoff = 7; + else + cutoff = 6; + + if (B->r < cutoff || B->c < cutoff) + return gr_mat_nonsingular_solve_tril_classical(X, L, B, unit, ctx); + else + return gr_mat_nonsingular_solve_tril_recursive(X, L, B, unit, ctx); +} + +int +nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, + const gr_mat_t B, int unit, gr_ctx_t ctx) +{ + slong cutoff, prec = NFLOAT_CTX_PREC(ctx); + + if (prec <= 256) + cutoff = 96; + else if (prec <= 512) + cutoff = 16; + else if (prec <= 576) + cutoff = 32; + else if (prec <= 1536) + cutoff = 8; + else if (prec <= 2176) + cutoff = 7; + else + cutoff = 6; + + if (B->r < cutoff || B->c < cutoff) + return gr_mat_nonsingular_solve_triu_classical(X, L, B, unit, ctx); + else + return gr_mat_nonsingular_solve_triu_recursive(X, L, B, unit, ctx); +} + +int +nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, + const gr_mat_t B, int unit, gr_ctx_t ctx) +{ + slong cutoff, prec = NFLOAT_CTX_PREC(ctx); + + if (prec <= 192) + cutoff = 64; + else if (prec <= 256) + cutoff = 16; + else if (prec <= 384) + cutoff = 7; + else if (prec == 576) + cutoff = 16; + else + cutoff = 6; + + if (B->r < cutoff || B->c < cutoff) + return gr_mat_nonsingular_solve_tril_classical(X, L, B, unit, ctx); + else + return gr_mat_nonsingular_solve_tril_recursive(X, L, B, unit, ctx); +} + +int +nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, + const gr_mat_t B, int unit, gr_ctx_t ctx) +{ + slong cutoff, prec = NFLOAT_CTX_PREC(ctx); + + if (prec <= 192) + cutoff = 64; + else if (prec <= 256) + cutoff = 16; + else if (prec <= 384) + cutoff = 7; + else if (prec == 576) + cutoff = 16; + else + cutoff = 6; + + if (B->r < cutoff || B->c < cutoff) + return gr_mat_nonsingular_solve_triu_classical(X, L, B, unit, ctx); + else + return gr_mat_nonsingular_solve_triu_recursive(X, L, B, unit, ctx); +} + +int +nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx) +{ + slong cutoff, prec = NFLOAT_CTX_PREC(ctx); + + if (prec <= 256) + cutoff = 32; + else if (prec <= 576) + cutoff = 28; + else if (prec <= 768) + cutoff = 16; + else if (prec <= 1536) + cutoff = 12; + else if (prec <= 2560) + cutoff = 8; + else + cutoff = 7; + + if (A->r < cutoff || A->c < cutoff) + return gr_mat_lu_classical(rank, P, LU, A, rank_check, ctx); + else + return gr_mat_lu_recursive(rank, P, LU, A, rank_check, ctx); +} + +int +nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx) +{ + slong cutoff, prec = NFLOAT_CTX_PREC(ctx); + + if (prec <= 256) + cutoff = 12; + else if (prec <= 512) + cutoff = 8; + else if (prec <= 576) + cutoff = 16; + else if (prec <= 1024) + cutoff = 7; + else + cutoff = 6; + + if (A->r < cutoff || A->c < cutoff) + return gr_mat_lu_classical(rank, P, LU, A, rank_check, ctx); + else + return gr_mat_lu_recursive(rank, P, LU, A, rank_check, ctx); +} From 99bd8468fb1a2bf3af0909bc009632e1c60deea7 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Sat, 20 Jul 2024 15:32:59 +0200 Subject: [PATCH 3/3] fixup logic --- src/gr/acf.c | 21 ++++++++------------- src/nfloat/complex.c | 21 ++++++++------------- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/src/gr/acf.c b/src/gr/acf.c index dc5be3b228..8d0f7c08fe 100644 --- a/src/gr/acf.c +++ b/src/gr/acf.c @@ -686,6 +686,11 @@ _gr_acf_cmpabs(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx) return _gr_arf_cmpabs(res, a, c, ctx); if (arf_is_zero(c)) return _gr_arf_cmpabs(res, a, d, ctx); + if (arf_is_zero(a)) + { + *res = -1; + return GR_SUCCESS; + } } if (arf_is_zero(a)) @@ -696,20 +701,10 @@ _gr_acf_cmpabs(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx) return _gr_arf_cmpabs(res, b, d, ctx); } - if (arf_is_zero(c)) + if (arf_is_zero(c) && arf_is_zero(d)) { - if (arf_is_zero(a)) - return _gr_arf_cmpabs(res, b, d, ctx); - if (arf_is_zero(b)) - return _gr_arf_cmpabs(res, a, d, ctx); - } - - if (arf_is_zero(d)) - { - if (arf_is_zero(a)) - return _gr_arf_cmpabs(res, b, c, ctx); - if (arf_is_zero(b)) - return _gr_arf_cmpabs(res, a, c, ctx); + *res = 1; + return GR_SUCCESS; } if (ARF_IS_LAGOM(a) && ARF_IS_LAGOM(b) && ARF_IS_LAGOM(c) && ARF_IS_LAGOM(d)) diff --git a/src/nfloat/complex.c b/src/nfloat/complex.c index 8b38bd81cb..e9fff75bbd 100644 --- a/src/nfloat/complex.c +++ b/src/nfloat/complex.c @@ -1566,6 +1566,11 @@ nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr return nfloat_cmpabs(res, a, c, ctx); if (NFLOAT_IS_ZERO(c)) return nfloat_cmpabs(res, a, d, ctx); + if (NFLOAT_IS_ZERO(a)) + { + *res = -1; + return GR_SUCCESS; + } } if (NFLOAT_IS_ZERO(a)) @@ -1576,20 +1581,10 @@ nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr return nfloat_cmpabs(res, b, d, ctx); } - if (NFLOAT_IS_ZERO(c)) - { - if (NFLOAT_IS_ZERO(a)) - return nfloat_cmpabs(res, b, d, ctx); - if (NFLOAT_IS_ZERO(b)) - return nfloat_cmpabs(res, a, d, ctx); - } - - if (NFLOAT_IS_ZERO(d)) + if (NFLOAT_IS_ZERO(c) && NFLOAT_IS_ZERO(d)) { - if (NFLOAT_IS_ZERO(a)) - return nfloat_cmpabs(res, b, c, ctx); - if (NFLOAT_IS_ZERO(b)) - return nfloat_cmpabs(res, a, c, ctx); + *res = 1; + return GR_SUCCESS; } aexp = NFLOAT_EXP(a);