Skip to content

Commit

Permalink
Merge pull request #1967 from flintlib/nfloat2
Browse files Browse the repository at this point in the history
Some nfloat optimisations and vector functions
  • Loading branch information
fredrik-johansson authored May 11, 2024
2 parents f09c577 + e8afcfb commit 80cd37d
Show file tree
Hide file tree
Showing 6 changed files with 827 additions and 26 deletions.
10 changes: 10 additions & 0 deletions doc/source/nfloat.rst
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,16 @@ Vector functions
Overrides for generic ``gr`` vector operations with inlined or partially inlined
code for reduced overhead.

.. function:: void _nfloat_vec_init(nfloat_ptr res, slong len, gr_ctx_t ctx)
void _nfloat_vec_clear(nfloat_ptr res, slong len, gr_ctx_t ctx)
int _nfloat_vec_set(nfloat_ptr res, nfloat_srcptr x, slong len, gr_ctx_t ctx)
int _nfloat_vec_zero(nfloat_ptr res, slong len, gr_ctx_t ctx)

.. function:: int _nfloat_vec_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)
int _nfloat_vec_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)
int _nfloat_vec_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)
int _nfloat_vec_mul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat_srcptr y, gr_ctx_t ctx)

.. function:: int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)
int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)

Expand Down
90 changes: 90 additions & 0 deletions src/gr/test_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -3247,6 +3247,13 @@ gr_test_vec_binary_op(gr_ctx_t R, const char * opname, int (*gr_op)(gr_ptr, gr_s

aliasing = n_randint(state, 4);

/* Don't test x * x == x^2 for inexact "rings" (e.g. floats) where
the squaring algorithm might not produce exactly the same result. */
if ((aliasing == 2 || aliasing == 3) && gr_ctx_is_ring(R) == T_FALSE && gr_ctx_is_exact(R) == T_FALSE)
{
aliasing = 4;
}

switch (aliasing)
{
case 0:
Expand All @@ -3258,6 +3265,10 @@ gr_test_vec_binary_op(gr_ctx_t R, const char * opname, int (*gr_op)(gr_ptr, gr_s
status |= _gr_vec_op(xy1, x, xy1, len, R);
break;
case 2:
status |= _gr_vec_set(y, x, len, R);
status |= _gr_vec_op(xy1, x, x, len, R);
break;
case 3:
status |= _gr_vec_set(y, x, len, R);
status |= _gr_vec_set(xy1, x, len, R);
status |= _gr_vec_op(xy1, xy1, xy1, len, R);
Expand All @@ -3284,6 +3295,8 @@ gr_test_vec_binary_op(gr_ctx_t R, const char * opname, int (*gr_op)(gr_ptr, gr_s
flint_printf("%s\n", opname);
gr_ctx_println(R);
flint_printf("aliasing: %d\n", aliasing);
_gr_vec_print(x, len, R); flint_printf("\n");
_gr_vec_print(y, len, R); flint_printf("\n");
_gr_vec_print(xy1, len, R); flint_printf("\n");
_gr_vec_print(xy2, len, R); flint_printf("\n");
}
Expand All @@ -3303,6 +3316,75 @@ int gr_test_vec_div(gr_ctx_t R, flint_rand_t state, int test_flags) { return gr_
int gr_test_vec_divexact(gr_ctx_t R, flint_rand_t state, int test_flags) { return gr_test_vec_binary_op(R, "vec_divexact", gr_divexact, _gr_vec_divexact, state, test_flags); }
int gr_test_vec_pow(gr_ctx_t R, flint_rand_t state, int test_flags) { return gr_test_vec_binary_op(R, "vec_pow", gr_pow, _gr_vec_pow, state, test_flags); }

int
gr_test_vec_binary_op_scalar(gr_ctx_t R, const char * opname, int (*gr_op)(gr_ptr, gr_srcptr, gr_srcptr, gr_ctx_t),
int (*_gr_vec_op)(gr_ptr, gr_srcptr, slong, gr_srcptr, gr_ctx_t), flint_rand_t state, int test_flags)
{
int status, aliasing;
slong i, len;
gr_ptr x, y, xy1, xy2;

len = n_randint(state, 5);

GR_TMP_INIT_VEC(x, len, R);
GR_TMP_INIT_VEC(y, 1, R);
GR_TMP_INIT_VEC(xy1, len, R);
GR_TMP_INIT_VEC(xy2, len, R);

GR_MUST_SUCCEED(_gr_vec_randtest(x, state, len, R));

GR_MUST_SUCCEED(_gr_vec_randtest(y, state, 1, R));
GR_MUST_SUCCEED(_gr_vec_randtest(xy1, state, len, R));
GR_MUST_SUCCEED(_gr_vec_randtest(xy2, state, len, R));

status = GR_SUCCESS;

aliasing = n_randint(state, 2);

if (aliasing)
{
status |= _gr_vec_set(xy1, x, len, R);
status |= _gr_vec_op(xy1, xy1, len, y, R);
}
else
{
status |= _gr_vec_op(xy1, x, len, y, R);
}

for (i = 0; i < len; i++)
status |= gr_op(GR_ENTRY(xy2, i, R->sizeof_elem),
GR_ENTRY(x, i, R->sizeof_elem),
y, R);

if (status == GR_SUCCESS && _gr_vec_equal(xy1, xy2, len, R) == T_FALSE)
{
status = GR_TEST_FAIL;
}

if ((test_flags & GR_TEST_ALWAYS_ABLE) && (status & GR_UNABLE))
status = GR_TEST_FAIL;

if ((test_flags & GR_TEST_VERBOSE) || status == GR_TEST_FAIL)
{
flint_printf("%s\n", opname);
gr_ctx_println(R);
flint_printf("aliasing: %d\n", aliasing);
_gr_vec_print(x, len, R); flint_printf("\n");
_gr_vec_print(y, 1, R); flint_printf("\n");
_gr_vec_print(xy1, len, R); flint_printf("\n");
_gr_vec_print(xy2, len, R); flint_printf("\n");
}

GR_TMP_CLEAR_VEC(x, len, R);
GR_TMP_CLEAR_VEC(y, 1, R);
GR_TMP_CLEAR_VEC(xy1, len, R);
GR_TMP_CLEAR_VEC(xy2, len, R);

return status;
}

int gr_test_vec_mul_scalar(gr_ctx_t R, flint_rand_t state, int test_flags) { return gr_test_vec_binary_op_scalar(R, "vec_mul_scalar", gr_mul, _gr_vec_mul_scalar, state, test_flags); }

int gr_generic_vec_dot(gr_ptr res, gr_srcptr initial, int subtract, gr_srcptr vec1, gr_srcptr vec2, slong len, gr_ctx_t ctx);

int
Expand Down Expand Up @@ -3740,6 +3822,8 @@ gr_test_ring(gr_ctx_t R, slong iters, int test_flags)
gr_test_iter(R, state, "vec_divexact", gr_test_vec_divexact, vec_iters, test_flags);
/* gr_test_iter(R, state, "vec_pow", gr_test_vec_pow, vec_iters, test_flags & (~GR_TEST_ALWAYS_ABLE)); large elements */

gr_test_iter(R, state, "vec_mul_scalar", gr_test_vec_mul_scalar, vec_iters, test_flags);

gr_test_iter(R, state, "vec_dot", gr_test_vec_dot, iters, test_flags);

gr_test_iter(R, state, "mat_mul_classical: associative", gr_test_mat_mul_classical_associative, iters, test_flags);
Expand Down Expand Up @@ -3824,6 +3908,7 @@ gr_test_floating_point(gr_ctx_t R, slong iters, int test_flags)
{
timeit_t timer;
flint_rand_t state;
slong vec_iters = iters / 10 + 1;

/* test_flags |= GR_TEST_VERBOSE; */

Expand Down Expand Up @@ -3859,6 +3944,11 @@ gr_test_floating_point(gr_ctx_t R, slong iters, int test_flags)
gr_test_iter(R, state, "div: aliasing", gr_test_div_aliasing, iters, test_flags);
gr_test_iter(R, state, "pow: aliasing", gr_test_pow_aliasing, iters, test_flags);

gr_test_iter(R, state, "vec_add", gr_test_vec_add, vec_iters, test_flags);
gr_test_iter(R, state, "vec_sub", gr_test_vec_sub, vec_iters, test_flags);
gr_test_iter(R, state, "vec_mul", gr_test_vec_mul, vec_iters, test_flags);
gr_test_iter(R, state, "vec_mul_scalar", gr_test_vec_mul_scalar, vec_iters, test_flags);

flint_randclear(state);

if (test_flags & GR_TEST_VERBOSE)
Expand Down
14 changes: 12 additions & 2 deletions src/nfloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,13 @@ int _nfloat_overflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx);

#define NFLOAT_HANDLE_UNDERFLOW(res, ctx) \
do { \
if (NFLOAT_EXP(res) < NFLOAT_MIN_EXP) \
if (FLINT_UNLIKELY(NFLOAT_EXP(res) < NFLOAT_MIN_EXP)) \
return _nfloat_underflow(res, NFLOAT_SGNBIT(res), ctx); \
} while (0)

#define NFLOAT_HANDLE_OVERFLOW(res, ctx) \
do { \
if (NFLOAT_EXP(res) < NFLOAT_MIN_EXP) \
if (FLINT_UNLIKELY(NFLOAT_EXP(res) < NFLOAT_MIN_EXP)) \
return _nfloat_underflow(res, NFLOAT_SGNBIT(res), ctx); \
} while (0)

Expand Down Expand Up @@ -329,6 +329,16 @@ int nfloat_atan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx);
int nfloat_gamma(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx);
int nfloat_zeta(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx);

void _nfloat_vec_init(nfloat_ptr res, slong len, gr_ctx_t ctx);
void _nfloat_vec_clear(nfloat_ptr res, slong len, gr_ctx_t ctx);
int _nfloat_vec_set(nfloat_ptr res, nfloat_srcptr x, slong len, gr_ctx_t ctx);
int _nfloat_vec_zero(nfloat_ptr res, slong len, gr_ctx_t ctx);

int _nfloat_vec_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
int _nfloat_vec_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
int _nfloat_vec_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
int _nfloat_vec_mul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat_srcptr y, gr_ctx_t ctx);

int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);

Expand Down
8 changes: 6 additions & 2 deletions src/nfloat/ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,14 @@ gr_method_tab_input _nfloat_methods_input[] =
{GR_METHOD_GAMMA, (gr_funcptr) nfloat_gamma},
{GR_METHOD_ZETA, (gr_funcptr) nfloat_zeta},

/*
{GR_METHOD_VEC_INIT, (gr_funcptr) _nfloat_vec_init},
{GR_METHOD_VEC_CLEAR, (gr_funcptr) _nfloat_vec_clear},
{GR_METHOD_VEC_SET, (gr_funcptr) _nfloat_vec_set},
{GR_METHOD_VEC_ZERO, (gr_funcptr) _nfloat_vec_zero},
{GR_METHOD_VEC_ADD, (gr_funcptr) _nfloat_vec_add},
{GR_METHOD_VEC_SUB, (gr_funcptr) _nfloat_vec_sub},
*/
{GR_METHOD_VEC_MUL, (gr_funcptr) _nfloat_vec_mul},
{GR_METHOD_VEC_MUL_SCALAR, (gr_funcptr) _nfloat_vec_mul_scalar},
{GR_METHOD_VEC_DOT, (gr_funcptr) _nfloat_vec_dot},
{GR_METHOD_VEC_DOT_REV, (gr_funcptr) _nfloat_vec_dot_rev},
/*
Expand Down
Loading

0 comments on commit 80cd37d

Please sign in to comment.