From 8645932a86d1b1d169df027aa8d0ed95e42b10d1 Mon Sep 17 00:00:00 2001
From: Fredrik Johansson <fredrik.johansson@gmail.com>
Date: Sat, 20 Jul 2024 12:27:57 +0200
Subject: [PATCH 1/3] proper cmpabs for acb, acf

---
 src/gr/acb.c | 103 ++++++++++++++++++++++++++++++++++---
 src/gr/acf.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 234 insertions(+), 11 deletions(-)

diff --git a/src/gr/acb.c b/src/gr/acb.c
index 0a14fbd78e..253ae17b50 100644
--- a/src/gr/acb.c
+++ b/src/gr/acb.c
@@ -987,21 +987,108 @@ _gr_acb_cmp(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx)
     }
 }
 
+int
+_gr_arb_cmpabs(int * res, const arb_t x, const arb_t y, const gr_ctx_t ctx);
+
 int
 _gr_acb_cmpabs(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx)
 {
-    acb_t t, u;
+    if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y)))
+    {
+        arb_srcptr a = acb_realref(x);
+        arb_srcptr c = acb_realref(y);
 
-    *t = *x;
-    *u = *y;
+        /* OK; ignores the context object */
+        return _gr_arb_cmpabs(res, a, c, ctx);
+    }
+    else
+    {
+        slong prec = ACB_CTX_PREC(ctx);
+        int status = GR_SUCCESS;
 
-    if (arf_sgn(arb_midref(acb_realref(t))) < 0)
-        ARF_NEG(arb_midref(acb_realref(t)));
+        arb_srcptr a = acb_realref(x);
+        arb_srcptr b = acb_imagref(x);
+        arb_srcptr c = acb_realref(y);
+        arb_srcptr d = acb_imagref(y);
+
+        mag_t xlo, xhi, ylo, yhi, t;
+
+        mag_init(xlo);
+        mag_init(xhi);
+        mag_init(ylo);
+        mag_init(yhi);
+        mag_init(t);
+
+        arb_get_mag_lower(xlo, a);
+        arb_get_mag_lower(t, b);
+        mag_mul_lower(xlo, xlo, xlo);
+        mag_mul_lower(t, t, t);
+        mag_add_lower(xlo, xlo, t);
+
+        arb_get_mag_lower(ylo, c);
+        arb_get_mag_lower(t, d);
+        mag_mul_lower(ylo, ylo, ylo);
+        mag_mul_lower(t, t, t);
+        mag_add_lower(ylo, ylo, t);
+
+        arb_get_mag(xhi, a);
+        arb_get_mag(t, b);
+        mag_mul(xhi, xhi, xhi);
+        mag_mul(t, t, t);
+        mag_add(xhi, xhi, t);
+
+        arb_get_mag(yhi, c);
+        arb_get_mag(t, d);
+        mag_mul(yhi, yhi, yhi);
+        mag_mul(t, t, t);
+        mag_add(yhi, yhi, t);
+
+        if (mag_cmp(xhi, ylo) < 0)
+        {
+            *res = -1;
+            status = GR_SUCCESS;
+        }
+        else if (mag_cmp(xlo, yhi) > 0)
+        {
+            *res = 1;
+            status = GR_SUCCESS;
+        }
+        else
+        {
+            arb_t t, u;
 
-    if (arf_sgn(arb_midref(acb_realref(u))) < 0)
-        ARF_NEG(arb_midref(acb_realref(u)));
+            arb_init(t);
+            arb_init(u);
 
-    return _gr_acb_cmp(res, t, u, ctx);
+            arb_mul(t, a, a, prec);
+            arb_addmul(t, b, b, prec);
+            arb_mul(u, c, c, prec);
+            arb_addmul(u, d, d, prec);
+
+            if ((arb_is_exact(t) && arb_is_exact(u)) || !arb_overlaps(t, u))
+            {
+                *res = arf_cmp(arb_midref(t), arb_midref(u));
+                status = GR_SUCCESS;
+            }
+            else
+            {
+                /* todo: worth it to do an exact computation? */
+                *res = 0;
+                status = GR_UNABLE;
+            }
+
+            arb_clear(t);
+            arb_clear(u);
+        }
+
+        mag_clear(xlo);
+        mag_clear(xhi);
+        mag_clear(ylo);
+        mag_clear(yhi);
+        mag_clear(t);
+
+        return status;
+    }
 }
 
 int
diff --git a/src/gr/acf.c b/src/gr/acf.c
index a2ec128a3c..dc5be3b228 100644
--- a/src/gr/acf.c
+++ b/src/gr/acf.c
@@ -666,13 +666,149 @@ _gr_acf_cmp(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx)
     return GR_SUCCESS;
 }
 
+/* ignores ctx, so we can pass in the acf context */
+int
+_gr_arf_cmpabs(int * res, const arf_t x, const arf_t y, const gr_ctx_t ctx);
+
+#include "double_extras.h"
+
 int
 _gr_acf_cmpabs(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx)
 {
-    if (!arf_is_zero(acf_imagref(x)) || !arf_is_zero(acf_imagref(y)))
-        return GR_UNABLE;
+    arf_srcptr a = acf_realref(x);
+    arf_srcptr b = acf_imagref(x);
+    arf_srcptr c = acf_realref(y);
+    arf_srcptr d = acf_imagref(y);
+
+    if (arf_is_zero(b))
+    {
+        if (arf_is_zero(d))
+            return _gr_arf_cmpabs(res, a, c, ctx);
+        if (arf_is_zero(c))
+            return _gr_arf_cmpabs(res, a, d, ctx);
+    }
+
+    if (arf_is_zero(a))
+    {
+        if (arf_is_zero(d))
+            return _gr_arf_cmpabs(res, b, c, ctx);
+        if (arf_is_zero(c))
+            return _gr_arf_cmpabs(res, b, d, ctx);
+    }
+
+    if (arf_is_zero(c))
+    {
+        if (arf_is_zero(a))
+            return _gr_arf_cmpabs(res, b, d, ctx);
+        if (arf_is_zero(b))
+            return _gr_arf_cmpabs(res, a, d, ctx);
+    }
+
+    if (arf_is_zero(d))
+    {
+        if (arf_is_zero(a))
+            return _gr_arf_cmpabs(res, b, c, ctx);
+        if (arf_is_zero(b))
+            return _gr_arf_cmpabs(res, a, c, ctx);
+    }
+
+    if (ARF_IS_LAGOM(a) && ARF_IS_LAGOM(b) && ARF_IS_LAGOM(c) && ARF_IS_LAGOM(d))
+    {
+        slong aexp, bexp, cexp, dexp, xexp, yexp, exp;
+
+        aexp = arf_is_zero(a) ? WORD_MIN : ARF_EXP(a);
+        bexp = arf_is_zero(b) ? WORD_MIN : ARF_EXP(b);
+        cexp = arf_is_zero(c) ? WORD_MIN : ARF_EXP(c);
+        dexp = arf_is_zero(d) ? WORD_MIN : ARF_EXP(d);
+
+        /* 0.5 * 2^xexp <= |x| < sqrt(2) * 2^xexp */
+        xexp = FLINT_MAX(aexp, bexp);
+        /* 0.5 * 2^yexp <= |y| < sqrt(2) * 2^yexp */
+        yexp = FLINT_MAX(cexp, dexp);
+
+        if (xexp + 2 < yexp)
+        {
+            *res = -1;
+            return GR_SUCCESS;
+        }
+
+        if (xexp > yexp + 2)
+        {
+            *res = 1;
+            return GR_SUCCESS;
+        }
+
+        exp = FLINT_MAX(xexp, yexp);
+
+        double tt, xx = 0.0, yy = 0.0;
+        nn_srcptr xp;
+        slong xn;
+
+        if (aexp >= exp - 53)
+        {
+            ARF_GET_MPN_READONLY(xp, xn, a);
+            tt = d_mul_2exp_inrange(xp[xn - 1], aexp - exp - FLINT_BITS);
+            xx += tt * tt;
+        }
+
+        if (bexp >= exp - 53)
+        {
+            ARF_GET_MPN_READONLY(xp, xn, b);
+            tt = d_mul_2exp_inrange(xp[xn - 1], bexp - exp - FLINT_BITS);
+            xx += tt * tt;
+        }
+
+        if (cexp >= exp - 53)
+        {
+            ARF_GET_MPN_READONLY(xp, xn, c);
+            tt = d_mul_2exp_inrange(xp[xn - 1], cexp - exp - FLINT_BITS);
+            yy += tt * tt;
+        }
+
+        if (dexp >= exp - 53)
+        {
+            ARF_GET_MPN_READONLY(xp, xn, d);
+            tt = d_mul_2exp_inrange(xp[xn - 1], dexp - exp - FLINT_BITS);
+            yy += tt * tt;
+        }
+
+        if (xx < yy * 0.999999)
+        {
+            *res = -1;
+            return GR_SUCCESS;
+        }
+
+        if (xx * 0.999999 > yy)
+        {
+            *res = 1;
+            return GR_SUCCESS;
+        }
+    }
+
+    arf_struct s[5];
+
+    arf_init(s + 0);
+    arf_init(s + 1);
+    arf_init(s + 2);
+    arf_init(s + 3);
+    arf_init(s + 4);
+
+    arf_mul(s + 0, a, a, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 1, b, b, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 2, c, c, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 3, d, d, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_neg(s + 2, s + 2);
+    arf_neg(s + 3, s + 3);
+    arf_sum(s + 4, s, 4, 30, ARF_RND_DOWN);
+
+    *res = arf_sgn(s + 4);
+
+    arf_clear(s + 0);
+    arf_clear(s + 1);
+    arf_clear(s + 2);
+    arf_clear(s + 3);
+    arf_clear(s + 4);
 
-    *res = arf_cmpabs(acf_realref(x), acf_realref(y));
     return GR_SUCCESS;
 }
 

From 373c6ec43aa44807c6aac3d92f05ff43505e80b3 Mon Sep 17 00:00:00 2001
From: Fredrik Johansson <fredrik.johansson@gmail.com>
Date: Sat, 20 Jul 2024 12:29:12 +0200
Subject: [PATCH 2/3] cmpabs & linear algebra tuning for nfloat, nfloat_complex

---
 doc/source/nfloat.rst |   7 ++
 src/gr/test_ring.c    |   7 +-
 src/nfloat.h          |   9 +++
 src/nfloat/complex.c  | 141 ++++++++++++++++++++++++++++++++++++--
 src/nfloat/ctx.c      |   3 +
 src/nfloat/mat.c      | 156 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 315 insertions(+), 8 deletions(-)
 create mode 100644 src/nfloat/mat.c

diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst
index ffdb6638e4..39d425ce52 100644
--- a/doc/source/nfloat.rst
+++ b/doc/source/nfloat.rst
@@ -324,6 +324,10 @@ Matrix functions
 
     Different implementations of matrix multiplication.
 
+.. function:: int nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
+
 Internal functions
 -------------------------------------------------------------------------------
 
@@ -417,3 +421,6 @@ real pairs.
               int nfloat_complex_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx)
               int nfloat_complex_mat_mul_reorder(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
               int nfloat_complex_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+              int nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx)
+              int nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c
index c584484772..d1a22be592 100644
--- a/src/gr/test_ring.c
+++ b/src/gr/test_ring.c
@@ -2904,7 +2904,7 @@ gr_test_ordered_ring_cmpabs(gr_ctx_t R, flint_rand_t state, int test_flags)
         status = GR_TEST_FAIL;
     }
 
-    if (status & GR_DOMAIN && !(status & GR_UNABLE))
+    if (gr_ctx_is_ordered_ring(R) == T_TRUE && (status & GR_DOMAIN && !(status & GR_UNABLE)))
     {
         status = GR_TEST_FAIL;
     }
@@ -4315,10 +4315,9 @@ gr_test_ring(gr_ctx_t R, slong iters, int test_flags)
     gr_test_iter(R, state, "pow: ui/si/fmpz/fmpq", gr_test_pow_type_variants, iters, test_flags & (~GR_TEST_ALWAYS_ABLE));
 
     if (gr_ctx_is_ordered_ring(R) == T_TRUE)
-    {
         gr_test_iter(R, state, "ordered_ring_cmp", gr_test_ordered_ring_cmp, iters, test_flags);
-        gr_test_iter(R, state, "ordered_ring_cmpabs", gr_test_ordered_ring_cmpabs, iters, test_flags);
-    }
+
+    gr_test_iter(R, state, "ordered_ring_cmpabs", gr_test_ordered_ring_cmpabs, iters, test_flags);
 
     gr_test_iter(R, state, "numerator_denominator", gr_test_numerator_denominator, iters, test_flags);
     gr_test_iter(R, state, "complex_parts", gr_test_complex_parts, iters, test_flags);
diff --git a/src/nfloat.h b/src/nfloat.h
index f0fdcf2985..12b7363e66 100644
--- a/src/nfloat.h
+++ b/src/nfloat.h
@@ -458,6 +458,11 @@ int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ct
 int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx);
 int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 
+int nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx);
+
+
 /* Complex numbers */
 /* Note: we use the same context data for real and complex rings
    (only which_ring and sizeof_elem differ). This allows us to call
@@ -569,6 +574,10 @@ int nfloat_complex_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B,
 int nfloat_complex_mat_mul_reorder(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 int nfloat_complex_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 
+int nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L, const gr_mat_t B, int unit, gr_ctx_t ctx);
+int nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/nfloat/complex.c b/src/nfloat/complex.c
index ca80a50e6f..8b38bd81cb 100644
--- a/src/nfloat/complex.c
+++ b/src/nfloat/complex.c
@@ -15,6 +15,7 @@
 #include "gr_generic.h"
 #include "acf.h"
 #include "acb.h"
+#include "mag.h"
 #include "nfloat.h"
 
 static int
@@ -1542,17 +1543,146 @@ nfloat_complex_cmp(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr y,
     return nfloat_cmp(res, NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx);
 }
 
+#include "double_extras.h"
+
 int
 nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr y, gr_ctx_t ctx)
 {
+    nfloat_srcptr a, b, c, d;
+    slong aexp, bexp, cexp, dexp, xexp, yexp, exp;
+    slong xn = NFLOAT_CTX_NLIMBS(ctx);
+
     if (NFLOAT_CTX_HAS_INF_NAN(ctx))
         return GR_UNABLE;
 
-    if (!NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(x, ctx)) ||
-        !NFLOAT_IS_ZERO(NFLOAT_COMPLEX_IM(y, ctx)))
-        return GR_UNABLE;
+    a = NFLOAT_COMPLEX_RE(x, ctx);
+    b = NFLOAT_COMPLEX_IM(x, ctx);
+    c = NFLOAT_COMPLEX_RE(y, ctx);
+    d = NFLOAT_COMPLEX_IM(y, ctx);
+
+    if (NFLOAT_IS_ZERO(b))
+    {
+        if (NFLOAT_IS_ZERO(d))
+            return nfloat_cmpabs(res, a, c, ctx);
+        if (NFLOAT_IS_ZERO(c))
+            return nfloat_cmpabs(res, a, d, ctx);
+    }
+
+    if (NFLOAT_IS_ZERO(a))
+    {
+        if (NFLOAT_IS_ZERO(d))
+            return nfloat_cmpabs(res, b, c, ctx);
+        if (NFLOAT_IS_ZERO(c))
+            return nfloat_cmpabs(res, b, d, ctx);
+    }
+
+    if (NFLOAT_IS_ZERO(c))
+    {
+        if (NFLOAT_IS_ZERO(a))
+            return nfloat_cmpabs(res, b, d, ctx);
+        if (NFLOAT_IS_ZERO(b))
+            return nfloat_cmpabs(res, a, d, ctx);
+    }
+
+    if (NFLOAT_IS_ZERO(d))
+    {
+        if (NFLOAT_IS_ZERO(a))
+            return nfloat_cmpabs(res, b, c, ctx);
+        if (NFLOAT_IS_ZERO(b))
+            return nfloat_cmpabs(res, a, c, ctx);
+    }
+
+    aexp = NFLOAT_EXP(a);
+    bexp = NFLOAT_EXP(b);
+    cexp = NFLOAT_EXP(c);
+    dexp = NFLOAT_EXP(d);
+
+    /* 0.5 * 2^xexp <= |x| < sqrt(2) * 2^xexp */
+    xexp = FLINT_MAX(aexp, bexp);
+    /* 0.5 * 2^yexp <= |y| < sqrt(2) * 2^yexp */
+    yexp = FLINT_MAX(cexp, dexp);
+
+    if (xexp + 2 < yexp)
+    {
+        *res = -1;
+        return GR_SUCCESS;
+    }
+
+    if (xexp > yexp + 2)
+    {
+        *res = 1;
+        return GR_SUCCESS;
+    }
+
+    exp = FLINT_MAX(xexp, yexp);
+
+    double tt, xx = 0.0, yy = 0.0;
+
+    if (aexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(a)[xn - 1], aexp - exp - FLINT_BITS);
+        xx += tt * tt;
+    }
 
-    return nfloat_cmpabs(res, NFLOAT_COMPLEX_RE(x, ctx), NFLOAT_COMPLEX_RE(y, ctx), ctx);
+    if (bexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(b)[xn - 1], bexp - exp - FLINT_BITS);
+        xx += tt * tt;
+    }
+
+    if (cexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(c)[xn - 1], cexp - exp - FLINT_BITS);
+        yy += tt * tt;
+    }
+
+    if (dexp >= exp - 53)
+    {
+        tt = d_mul_2exp_inrange(NFLOAT_D(d)[xn - 1], dexp - exp - FLINT_BITS);
+        yy += tt * tt;
+    }
+
+    if (xx < yy * 0.999999)
+    {
+        *res = -1;
+        return GR_SUCCESS;
+    }
+
+    if (xx * 0.999999 > yy)
+    {
+        *res = 1;
+        return GR_SUCCESS;
+    }
+
+    arf_struct s[5];
+
+    arf_init(s + 0);
+    arf_init(s + 1);
+    arf_init(s + 2);
+    arf_init(s + 3);
+    arf_init(s + 4);
+
+    nfloat_get_arf(s + 0, a, ctx);
+    nfloat_get_arf(s + 1, b, ctx);
+    nfloat_get_arf(s + 2, c, ctx);
+    nfloat_get_arf(s + 3, d, ctx);
+
+    arf_mul(s + 0, s + 0, s + 0, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 1, s + 1, s + 1, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 2, s + 2, s + 2, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_mul(s + 3, s + 3, s + 3, ARF_PREC_EXACT, ARF_RND_DOWN);
+    arf_neg(s + 2, s + 2);
+    arf_neg(s + 3, s + 3);
+    arf_sum(s + 4, s, 4, 30, ARF_RND_DOWN);
+    *res = arf_sgn(s + 4);
+
+    arf_clear(s + 0);
+    arf_clear(s + 1);
+    arf_clear(s + 2);
+    arf_clear(s + 3);
+    arf_clear(s + 4);
+
+    return GR_SUCCESS;
 }
 
 int
@@ -1794,6 +1924,9 @@ gr_method_tab_input _nfloat_complex_methods_input[] =
     {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_complex_poly_roots_other},
 */
     {GR_METHOD_MAT_MUL,         (gr_funcptr) nfloat_complex_mat_mul},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIL,  (gr_funcptr) nfloat_complex_mat_nonsingular_solve_tril},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIU,  (gr_funcptr) nfloat_complex_mat_nonsingular_solve_triu},
+    {GR_METHOD_MAT_LU,                      (gr_funcptr) nfloat_complex_mat_lu},
     {GR_METHOD_MAT_DET,         (gr_funcptr) gr_mat_det_generic_field},
     {GR_METHOD_MAT_FIND_NONZERO_PIVOT,     (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs},
 
diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c
index 6178b877cc..c73f1a1a1e 100644
--- a/src/nfloat/ctx.c
+++ b/src/nfloat/ctx.c
@@ -175,6 +175,9 @@ gr_method_tab_input _nfloat_methods_input[] =
     {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other},
 */
     {GR_METHOD_MAT_MUL,         (gr_funcptr) nfloat_mat_mul},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIL,  (gr_funcptr) nfloat_mat_nonsingular_solve_tril},
+    {GR_METHOD_MAT_NONSINGULAR_SOLVE_TRIU,  (gr_funcptr) nfloat_mat_nonsingular_solve_triu},
+    {GR_METHOD_MAT_LU,                      (gr_funcptr) nfloat_mat_lu},
     {GR_METHOD_MAT_DET,         (gr_funcptr) gr_mat_det_generic_field},
     {GR_METHOD_MAT_FIND_NONZERO_PIVOT,     (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs},
 
diff --git a/src/nfloat/mat.c b/src/nfloat/mat.c
new file mode 100644
index 0000000000..25d3ff271f
--- /dev/null
+++ b/src/nfloat/mat.c
@@ -0,0 +1,156 @@
+/*
+    Copyright (C) 2024 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 3 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "gr.h"
+#include "nfloat.h"
+#include "gr_mat.h"
+
+int
+nfloat_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 96;
+    else if (prec <= 512)
+        cutoff = 16;
+    else if (prec <= 576)
+        cutoff = 32;
+    else if (prec <= 1536)
+        cutoff = 8;
+    else if (prec <= 2176)
+        cutoff = 7;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_tril_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_tril_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 96;
+    else if (prec <= 512)
+        cutoff = 16;
+    else if (prec <= 576)
+        cutoff = 32;
+    else if (prec <= 1536)
+        cutoff = 8;
+    else if (prec <= 2176)
+        cutoff = 7;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_triu_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_triu_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_complex_mat_nonsingular_solve_tril(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 192)
+        cutoff = 64;
+    else if (prec <= 256)
+        cutoff = 16;
+    else if (prec <= 384)
+        cutoff = 7;
+    else if (prec == 576)
+        cutoff = 16;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_tril_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_tril_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_complex_mat_nonsingular_solve_triu(gr_mat_t X, const gr_mat_t L,
+                                    const gr_mat_t B, int unit, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 192)
+        cutoff = 64;
+    else if (prec <= 256)
+        cutoff = 16;
+    else if (prec <= 384)
+        cutoff = 7;
+    else if (prec == 576)
+        cutoff = 16;
+    else
+        cutoff = 6;
+
+    if (B->r < cutoff || B->c < cutoff)
+        return gr_mat_nonsingular_solve_triu_classical(X, L, B, unit, ctx);
+    else
+        return gr_mat_nonsingular_solve_triu_recursive(X, L, B, unit, ctx);
+}
+
+int
+nfloat_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 32;
+    else if (prec <= 576)
+        cutoff = 28;
+    else if (prec <= 768)
+        cutoff = 16;
+    else if (prec <= 1536)
+        cutoff = 12;
+    else if (prec <= 2560)
+        cutoff = 8;
+    else
+        cutoff = 7;
+
+    if (A->r < cutoff || A->c < cutoff)
+        return gr_mat_lu_classical(rank, P, LU, A, rank_check, ctx);
+    else
+        return gr_mat_lu_recursive(rank, P, LU, A, rank_check, ctx);
+}
+
+int
+nfloat_complex_mat_lu(slong * rank, slong * P, gr_mat_t LU, const gr_mat_t A, int rank_check, gr_ctx_t ctx)
+{
+    slong cutoff, prec = NFLOAT_CTX_PREC(ctx);
+
+    if (prec <= 256)
+        cutoff = 12;
+    else if (prec <= 512)
+        cutoff = 8;
+    else if (prec <= 576)
+        cutoff = 16;
+    else if (prec <= 1024)
+        cutoff = 7;
+    else
+        cutoff = 6;
+
+    if (A->r < cutoff || A->c < cutoff)
+        return gr_mat_lu_classical(rank, P, LU, A, rank_check, ctx);
+    else
+        return gr_mat_lu_recursive(rank, P, LU, A, rank_check, ctx);
+}

From 99bd8468fb1a2bf3af0909bc009632e1c60deea7 Mon Sep 17 00:00:00 2001
From: Fredrik Johansson <fredrik.johansson@gmail.com>
Date: Sat, 20 Jul 2024 15:32:59 +0200
Subject: [PATCH 3/3] fixup logic

---
 src/gr/acf.c         | 21 ++++++++-------------
 src/nfloat/complex.c | 21 ++++++++-------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/src/gr/acf.c b/src/gr/acf.c
index dc5be3b228..8d0f7c08fe 100644
--- a/src/gr/acf.c
+++ b/src/gr/acf.c
@@ -686,6 +686,11 @@ _gr_acf_cmpabs(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx)
             return _gr_arf_cmpabs(res, a, c, ctx);
         if (arf_is_zero(c))
             return _gr_arf_cmpabs(res, a, d, ctx);
+        if (arf_is_zero(a))
+        {
+            *res = -1;
+            return GR_SUCCESS;
+        }
     }
 
     if (arf_is_zero(a))
@@ -696,20 +701,10 @@ _gr_acf_cmpabs(int * res, const acf_t x, const acf_t y, const gr_ctx_t ctx)
             return _gr_arf_cmpabs(res, b, d, ctx);
     }
 
-    if (arf_is_zero(c))
+    if (arf_is_zero(c) && arf_is_zero(d))
     {
-        if (arf_is_zero(a))
-            return _gr_arf_cmpabs(res, b, d, ctx);
-        if (arf_is_zero(b))
-            return _gr_arf_cmpabs(res, a, d, ctx);
-    }
-
-    if (arf_is_zero(d))
-    {
-        if (arf_is_zero(a))
-            return _gr_arf_cmpabs(res, b, c, ctx);
-        if (arf_is_zero(b))
-            return _gr_arf_cmpabs(res, a, c, ctx);
+        *res = 1;
+        return GR_SUCCESS;
     }
 
     if (ARF_IS_LAGOM(a) && ARF_IS_LAGOM(b) && ARF_IS_LAGOM(c) && ARF_IS_LAGOM(d))
diff --git a/src/nfloat/complex.c b/src/nfloat/complex.c
index 8b38bd81cb..e9fff75bbd 100644
--- a/src/nfloat/complex.c
+++ b/src/nfloat/complex.c
@@ -1566,6 +1566,11 @@ nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr
             return nfloat_cmpabs(res, a, c, ctx);
         if (NFLOAT_IS_ZERO(c))
             return nfloat_cmpabs(res, a, d, ctx);
+        if (NFLOAT_IS_ZERO(a))
+        {
+            *res = -1;
+            return GR_SUCCESS;
+        }
     }
 
     if (NFLOAT_IS_ZERO(a))
@@ -1576,20 +1581,10 @@ nfloat_complex_cmpabs(int * res, nfloat_complex_srcptr x, nfloat_complex_srcptr
             return nfloat_cmpabs(res, b, d, ctx);
     }
 
-    if (NFLOAT_IS_ZERO(c))
-    {
-        if (NFLOAT_IS_ZERO(a))
-            return nfloat_cmpabs(res, b, d, ctx);
-        if (NFLOAT_IS_ZERO(b))
-            return nfloat_cmpabs(res, a, d, ctx);
-    }
-
-    if (NFLOAT_IS_ZERO(d))
+    if (NFLOAT_IS_ZERO(c) && NFLOAT_IS_ZERO(d))
     {
-        if (NFLOAT_IS_ZERO(a))
-            return nfloat_cmpabs(res, b, c, ctx);
-        if (NFLOAT_IS_ZERO(b))
-            return nfloat_cmpabs(res, a, c, ctx);
+        *res = 1;
+        return GR_SUCCESS;
     }
 
     aexp = NFLOAT_EXP(a);