cmpabs & linear algebra tuning for nfloat, nfloat_complex

flintlib · Jul 20, 2024 · 373c6ec · 373c6ec
1 parent 8645932
commit 373c6ec
Show file tree

Hide file tree

Showing 6 changed files with 315 additions and 8 deletions.
diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst
@@ -324,6 +324,10 @@ Matrix functions
 
     Different implementations of matrix multiplication.
 
+.. function:: int nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
+
 Internal functions
 -------------------------------------------------------------------------------
 
@@ -417,3 +421,6 @@ real pairs.
               int nfloat_complex_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx)
               int nfloat_complex_mat_mul_reorder(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
               int nfloat_complex_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+              int nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c
@@ -2904,7 +2904,7 @@ gr_test_ordered_ring_cmpabs(gr_ctx_t R, flint_rand_t state, int test_flags)
         status = GR_TEST_FAIL;
     }
 
-    if (status & GR_DOMAIN && !(status & GR_UNABLE))
+    if (gr_ctx_is_ordered_ring(R) == T_TRUE && (status & GR_DOMAIN && !(status & GR_UNABLE)))
     {
         status = GR_TEST_FAIL;
     }
@@ -4315,10 +4315,9 @@ gr_test_ring(gr_ctx_t R, slong iters, int test_flags)
     gr_test_iter(R, state, "pow: ui/si/fmpz/fmpq", gr_test_pow_type_variants, iters, test_flags & (~GR_TEST_ALWAYS_ABLE));
 
     if (gr_ctx_is_ordered_ring(R) == T_TRUE)
-    {
         gr_test_iter(R, state, "ordered_ring_cmp", gr_test_ordered_ring_cmp, iters, test_flags);
-        gr_test_iter(R, state, "ordered_ring_cmpabs", gr_test_ordered_ring_cmpabs, iters, test_flags);
-    }
+
+    gr_test_iter(R, state, "ordered_ring_cmpabs", gr_test_ordered_ring_cmpabs, iters, test_flags);
 
     gr_test_iter(R, state, "numerator_denominator", gr_test_numerator_denominator, iters, test_flags);
     gr_test_iter(R, state, "complex_parts", gr_test_complex_parts, iters, test_flags);

diff --git a/src/nfloat.h b/src/nfloat.h
@@ -458,6 +458,11 @@ int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ct
 int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx);
 int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 
+int nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx);
+
+
 /* Complex numbers */
 /* Note: we use the same context data for real and complex rings
    (only which_ring and sizeof_elem differ). This allows us to call
@@ -569,6 +574,10 @@ int nfloat_complex_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B,
 int nfloat_complex_mat_mul_reorder(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 int nfloat_complex_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 
+int nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx);
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/src/nfloat/complex.c b/src/nfloat/complex.c
@@ -15,6 +15,7 @@
 #include "gr_generic.h"
 #include "acf.h"
 #include "acb.h"
+#include "mag.h"
 #include "nfloat.h"
 
 static int
@@ -1542,17 +1543,146 @@ nfloat_complex_cmp(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr y,
     return nfloat_cmp(res, NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx);
 }
 
+#include "double_extras.h"
+
 int
 nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx)
 {
+    nfloat_srcptr a, b, c, d;
+    slong aexp, bexp, cexp, dexp, xexp, yexp, exp;
+    slong xn = NFLOAT_CTX_NLIMBS(ctx);
+
     if (NFLOAT_CTX_HAS_INF_NAN(ctx))
         return GR_UNABLE;
 
-    if (!NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(x, ctx)) ||
-        !NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(y, ctx)))
-        return GR_UNABLE;
+    a = NFLOAT_COMPLEX_RE(x, ctx);
+    b = NFLOAT_COMPLEX_IM(x, ctx);
+    c = NFLOAT_COMPLEX_RE(y, ctx);
+    d = NFLOAT_COMPLEX_IM(y, ctx);
+
+    if (NFLOAT_IS_ZERO(b))
+    {
+        if (NFLOAT_IS_ZERO(d))
+            return nfloat_cmpabs(res, a, c, ctx);
+        if (NFLOAT_IS_ZERO(c))
+            return nfloat_cmpabs(res, a, d, ctx);
+    }
+
+    if (NFLOAT_IS_ZERO(a))
+    {
+        if (NFLOAT_IS_ZERO(d))
+            return nfloat_cmpabs(res, b, c, ctx);
+        if (NFLOAT_IS_ZERO(c))
+            return nfloat_cmpabs(res, b, d, ctx);
+    }
+
+    if (NFLOAT_IS_ZERO(c))
+    {
+        if (NFLOAT_IS_ZERO(a))
+            return nfloat_cmpabs(res, b, d, ctx);
+        if (NFLOAT_IS_ZERO(b))
+            return nfloat_cmpabs(res, a, d, ctx);
+    }
+
+    if (NFLOAT_IS_ZERO(d))
+    {
+        if (NFLOAT_IS_ZERO(a))
+            return nfloat_cmpabs(res, b, c, ctx);
+        if (NFLOAT_IS_ZERO(b))
+            return nfloat_cmpabs(res, a, c, ctx);
+    }
+
+    aexp = NFLOAT_EXP(a);
+    bexp = NFLOAT_EXP(b);
+    cexp = NFLOAT_EXP(c);
+    dexp = NFLOAT_EXP(d);
+
+    /* 0.5 * 2^xexp <= |x| < sqrt(2) * 2^xexp */
+    xexp = FLINT_MAX(aexp, bexp);
+    /* 0.5 * 2^yexp <= |y| < sqrt(2) * 2^yexp */
+    yexp = FLINT_MAX(cexp, dexp);
+
+    if (xexp + 2 < yexp)
+    {
+        *res = -1;
+        return GR_SUCCESS;
+    }
+
+    if (xexp > yexp + 2)
+    {
+        *res = 1;
+        return GR_SUCCESS;
+    }
+
+    exp = FLINT_MAX(xexp, yexp);
+
+    double tt, xx = 0.0, yy = 0.0;
+
+    if (aexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(a)[xn - 1], aexp - exp - FLINT_BITS);
+        xx += tt * tt;
+    }
 
-    return nfloat_cmpabs(res, NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx);
+    if (bexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(b)[xn - 1], bexp - exp - FLINT_BITS);
+        xx += tt * tt;
+    }
+
+    if (cexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(c)[xn - 1], cexp - exp - FLINT_BITS);
+        yy += tt * tt;
+    }
+
+    if (dexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(d)[xn - 1], dexp - exp - FLINT_BITS);
+        yy += tt * tt;
+    }
+
+    if (xx < yy * 0.999999)
+    {
+        *res = -1;
+        return GR_SUCCESS;
+    }
+
+    if (xx * 0.999999 > yy)
+    {
+        *res = 1;
+        return GR_SUCCESS;
+    }
+
+    arf_struct s[5];
+
+    arf_init(s + 0);
+    arf_init(s + 1);
+    arf_init(s + 2);
+    arf_init(s + 3);
+    arf_init(s + 4);
+
+    nfloat_get_arf(s + 0, a, ctx);
+    nfloat_get_arf(s + 1, b, ctx);
+    nfloat_get_arf(s + 2, c, ctx);
+    nfloat_get_arf(s + 3, d, ctx);
+
+    arf_mul(s + 0, s + 0, s + 0, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 1, s + 1, s + 1, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 2, s + 2, s + 2, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 3, s + 3, s + 3, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_neg(s + 2, s + 2);
+    arf_neg(s + 3, s + 3);
+    arf_sum(s + 4, s, 4, 30, ARF_RND_DOWN);
+    *res = arf_sgn(s + 4);
+
+    arf_clear(s + 0);
+    arf_clear(s + 1);
+    arf_clear(s + 2);
+    arf_clear(s + 3);
+    arf_clear(s + 4);
+
+    return GR_SUCCESS;
 }
 
 int
@@ -1794,6 +1924,9 @@ gr_method_tab_input _nfloat_complex_methods_input[] =
     {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_complex_poly_roots_other},
 */
     {GR_METHOD_MAT_MUL,         (gr_funcptr) nfloat_complex_mat_mul},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIL,  (gr_funcptr) nfloat_complex_mat_nonsingular_solve_tril},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIU,  (gr_funcptr) nfloat_complex_mat_nonsingular_solve_triu},
+    {GR_METHOD_MAT_LU,                      (gr_funcptr) nfloat_complex_mat_lu},
     {GR_METHOD_MAT_DET,         (gr_funcptr) gr_mat_det_generic_field},
     {GR_METHOD_MAT_FIND_NONZERO_PIVOT,     (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs},
 

diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c
@@ -175,6 +175,9 @@ gr_method_tab_input _nfloat_methods_input[] =
     {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other},
 */
     {GR_METHOD_MAT_MUL,         (gr_funcptr) nfloat_mat_mul},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIL,  (gr_funcptr) nfloat_mat_nonsingular_solve_tril},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIU,  (gr_funcptr) nfloat_mat_nonsingular_solve_triu},
+    {GR_METHOD_MAT_LU,                      (gr_funcptr) nfloat_mat_lu},
     {GR_METHOD_MAT_DET,         (gr_funcptr) gr_mat_det_generic_field},
     {GR_METHOD_MAT_FIND_NONZERO_PIVOT,     (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs},
 

diff --git a/src/nfloat/mat.c b/src/nfloat/mat.c
@@ -0,0 +1,156 @@
+/*
+    Copyright (C) 2024 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 3 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "gr.h"
+#include "nfloat.h"
+#include "gr_mat.h"
+
+int
+nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 96;
+    else if (prec <= 512)
+        cutoff = 16;
+    else if (prec <= 576)
+        cutoff = 32;
+    else if (prec <= 1536)
+        cutoff = 8;
+    else if (prec <= 2176)
+        cutoff = 7;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_tril_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_tril_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 96;
+    else if (prec <= 512)
+        cutoff = 16;
+    else if (prec <= 576)
+        cutoff = 32;
+    else if (prec <= 1536)
+        cutoff = 8;
+    else if (prec <= 2176)
+        cutoff = 7;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_triu_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_triu_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 192)
+        cutoff = 64;
+    else if (prec <= 256)
+        cutoff = 16;
+    else if (prec <= 384)
+        cutoff = 7;
+    else if (prec == 576)
+        cutoff = 16;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_tril_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_tril_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 192)
+        cutoff = 64;
+    else if (prec <= 256)
+        cutoff = 16;
+    else if (prec <= 384)
+        cutoff = 7;
+    else if (prec == 576)
+        cutoff = 16;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_triu_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_triu_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 32;
+    else if (prec <= 576)
+        cutoff = 28;
+    else if (prec <= 768)
+        cutoff = 16;
+    else if (prec <= 1536)
+        cutoff = 12;
+    else if (prec <= 2560)
+        cutoff = 8;
+    else
+        cutoff = 7;
+
+    if (A->r < cutoff || A->c < cutoff)
+        return gr_mat_lu_classical(rank, P, LU, A, rank_check, ctx);
+    else
+        return gr_mat_lu_recursive(rank, P, LU, A, rank_check, ctx);
+}
+
+int
+nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 12;
+    else if (prec <= 512)
+        cutoff = 8;
+    else if (prec <= 576)
+        cutoff = 16;
+    else if (prec <= 1024)
+        cutoff = 7;
+    else
+        cutoff = 6;
+
+    if (A->r < cutoff || A->c < cutoff)
+        return gr_mat_lu_classical(rank, P, LU, A, rank_check, ctx);
+    else
+        return gr_mat_lu_recursive(rank, P, LU, A, rank_check, ctx);
+}