Skip to content

Commit

Permalink
Merge pull request #170 from munroesj52/stable/1.0.4-4
Browse files Browse the repository at this point in the history
Stable/1.0.4 4
  • Loading branch information
munroesj52 authored Sep 16, 2022
2 parents 6075912 + c661e85 commit e3ecd3b
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 58 deletions.
2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -13430,7 +13430,7 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
ac_compiler_gnu=$ac_cv_c_compiler_gnu



# remove AC_PROG_LIBTOOL for autotools 2.71

# This directive is to avoid buggy libtool that doesn't add the '-Wl,--no-as-needed'
# directive in the correct position of LDFLAGS
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ PVECLIB_SO_VERSION=1:4:0
AC_SUBST(PVECLIB_SO_VERSION)

AC_PROG_CC
LT_INIT
# remove AC_PROG_LIBTOOL for autotools 2.71

# This directive is to avoid buggy libtool that doesn't add the '-Wl,--no-as-needed'
# directive in the correct position of LDFLAGS
Expand Down
23 changes: 21 additions & 2 deletions src/pveclib/vec_f128_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,31 @@ test_cosf128 (__binary128 value)
-mcu=power9 and -mfloat128.
So far clang does not support/define the __ibm128 type. */
#ifdef __FLOAT128__
typedef __float128 __Float128;
#ifndef __clang__
// For now assume the not __clang__ implies GCC
// Can't just #ifdef __GNUC__ as Clang defined it
#ifdef __float128
// Can assume GCC 7 or later so ...
// That version defines __ieee128 internally and
// #defines __float128 to __ieee128, so both are defined
// Define __binary128 so both GCC and CLang can use a single type
#define __binary128 __ieee128
#else
// Assume GCC 6 or earlier
// So the compiler defines __float128 only
typedef __float128 __binary128;
typedef __float128 __ieee128;
#ifndef __clang__
#endif
#if (__GNUC__ < 7)
typedef __float128 _Float128;
#endif
typedef __ibm128 __IBM128;
#else
/* Clang started defining __FLOAT128__ and does not allow redefining
__float128 or __ieee128. Worse it will give errors if you try to
use either type. So define __binary128 as if __FLOAT128__ is not
defined. */
typedef vui128_t __binary128;
/* Clang does not define __ibm128 over IBM long double.
So defined it here. */
typedef long double __IBM128;
Expand Down
37 changes: 29 additions & 8 deletions src/pveclib/vec_f32_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -774,31 +774,52 @@ vec_any_iszerof32 (vf32_t vf32)
#endif
}

/** \brief Copy the sign bit from vf32y merged with magnitude from
* vf32x and return the resulting vector float values.
/** \brief Copy the sign bit from vf32x merged with magnitude from
* vf32y and return the resulting vector float values.
*
* \note This operation was patterned after the intrinsic vec_cpsgn
* (altivec.h) introduced for POWER7 and VSX. It turns out the
* original (GCC 4.9) compiler implementation reversed the operands
* and does not match the PowerISA or the Vector Intrinsic Programming
* Reference manuals. Subsequent compilers and PVECLIB
* implementations replicated this (operand order) error.
* This has now been reported as bug against the compilers, which are
* in the process of applying fixes and distributing updates.
* This version of PVECLIB is updated to match the Vector Intrinsic
* Programming Reference. This implementation is independent of the
* compilers update status.
*
* |processor|Latency|Throughput|
* |--------:|:-----:|:---------|
* |power8 | 6-7 | 2/cycle |
* |power9 | 2 | 2/cycle |
*
* @param vf32x vector float values containing the magnitudes.
* @param vf32y vector float values containing the sign bits.
* @return vector float values with magnitude from vf32x and the
* sign of vf32y.
* @param vf32x vector float values containing the sign bits.
* @param vf32y vector float values containing the magnitudes.
* @return vector float values with magnitude from vf32y and the
* sign of vf32x.
*/
static inline vf32_t
vec_copysignf32 (vf32_t vf32x, vf32_t vf32y)
{
#if _ARCH_PWR7
/* P9 has a 2 cycle xvcpsgnsp and eliminates a const load. */
#ifdef PVECLIB_CPSGN_FIXED
return (vec_cpsgn (vf32x, vf32y));
#else
vf32_t result;
__asm__(
"xvcpsgnsp %x0,%x1,%x2;\n"
: "=wa" (result)
: "wa" (vf32x), "wa" (vf32y)
:);
return (result);
#endif
#else
const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000,
0x80000000, 0x80000000);
vf32_t result;

result = (vf32_t)vec_sel ((vui32_t)vf32x, (vui32_t)vf32y, signmask);
result = (vf32_t)vec_sel ((vui32_t)vf32y, (vui32_t)vf32x, signmask);
return (result);
#endif
}
Expand Down
46 changes: 34 additions & 12 deletions src/pveclib/vec_f64_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -773,31 +773,53 @@ vec_any_iszerof64 (vf64_t vf64)
#endif
}

/** \brief Copy the sign bit from vf64y merged with magnitude from
* vf64x and return the resulting vector double values.
/** \brief Copy the sign bit from vf64x merged with magnitude from
* vf64y and return the resulting vector double values.
*
* \note This operation was patterned after the intrinsic vec_cpsgn
* (altivec.h) introduced for POWER7 and VSX. It turns out the
* original (GCC 4.9) compiler implementation reversed the operands
* and does not match the PowerISA or the Vector Intrinsic Programming
* Reference manuals. Subsequent compilers and PVECLIB
* implementations replicated this (operand order) error.
* This has now been reported as bug against the compilers, which are
* in the process of applying fixes and distributing updates.
* This version of PVECLIB is updated to match the Vector Intrinsic
* Programming Reference. This implementation is independent of the
* compilers update status.
*
* |processor|Latency|Throughput|
* |--------:|:-----:|:---------|
* |power8 | 6-7 | 2/cycle |
* |power9 | 2 | 2/cycle |
*
* @param vf64x vector double values containing the magnitudes.
* @param vf64y vector double values containing the sign bits.
* @return vector double values with magnitude from vf64x and the
* sign of vf64y.
* @param vf64x vector double values containing the sign bits.
* @param vf64y vector double values containing the magnitudes.
* @return vector double values with magnitude from vf64y and the
* sign of vf64x.
*/
static inline vf64_t
vec_copysignf64 (vf64_t vf64x , vf64_t vf64y)
vec_copysignf64 (vf64_t vf64x, vf64_t vf64y)
{
#if _ARCH_PWR7
/* P9 has a 2 cycle xvcpsgndp and eliminates a const load. */
return (vec_cpsgn (vf64x, vf64y));
#ifdef PVECLIB_CPSGN_FIXED
return (vec_cpsgn (vf64x, vf64y));
#else
vf64_t result;
__asm__(
"xvcpsgndp %x0,%x1,%x2;\n"
: "=wa" (result)
: "wa" (vf64x), "wa" (vf64y)
:);
return (result);
#endif
#else
const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0);
vf64_t result;
const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0);
vf64_t result;

result = (vf64_t)vec_sel ((vui32_t)vf64x, (vui32_t)vf64y, signmask);
return (result);
result = (vf64_t) vec_sel ((vui32_t) vf64y, (vui32_t) vf64x, signmask);
return (result);
#endif
}

Expand Down
14 changes: 7 additions & 7 deletions src/testsuite/arith128_test_f32.c
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ test_float_cpsgn (void)

i = (vf32_t) { 0.0, -0.0, 0.0, -0.0 };
j = (vf32_t) {-0.0, 0.0, -0.0, 0.0 };
e = (vf32_t) {-0.0, 0.0, -0.0, 0.0 };
e = (vf32_t) { 0.0, -0.0, 0.0, -0.0 };
k = vec_copysignf32 (i, j);

#ifdef __DEBUG_PRINT__
Expand All @@ -1196,9 +1196,9 @@ test_float_cpsgn (void)
#endif
rc += check_v4f32x ("vec_copysignf32 1:", k, e);

i = (vf32_t) { __FLT_MAX__, __FLT_MIN__, __FLT_EPSILON__,
i = (vf32_t) {-0.0, 0.0, -0.0, 0.0 };
j = (vf32_t) { __FLT_MAX__, __FLT_MIN__, __FLT_EPSILON__,
__FLT_DENORM_MIN__ };
j = (vf32_t) {-0.0, 0.0, -0.0, 0.0 };
e = (vf32_t) { -(__FLT_MAX__), __FLT_MIN__, -(__FLT_EPSILON__),
__FLT_DENORM_MIN__ };
k = vec_copysignf32 (i, j);
Expand All @@ -1210,9 +1210,9 @@ test_float_cpsgn (void)
#endif
rc += check_v4f32x ("vec_copysignf32 2:", k, e);

i = (vf32_t) CONST_VINT128_W(__FLOAT_INF, __FLOAT_NINF, __FLOAT_INF,
i = (vf32_t) CONST_VINT32_W(0.0, -0.0, 0.0, -0.0);
j = (vf32_t) CONST_VINT128_W(__FLOAT_INF, __FLOAT_NINF, __FLOAT_INF,
__FLOAT_NINF);
j = (vf32_t) CONST_VINT32_W(0.0, -0.0, 0.0, -0.0);
e = (vf32_t) CONST_VINT128_W(__FLOAT_INF, __FLOAT_NINF, __FLOAT_INF,
__FLOAT_NINF);
k = vec_copysignf32 (i, j);
Expand All @@ -1224,9 +1224,9 @@ test_float_cpsgn (void)
#endif
rc += check_v4f32x ("vec_copysignf32 3:", k, e);

i = (vf32_t) CONST_VINT128_W(__FLOAT_NAN, __FLOAT_NNAN, __FLOAT_NSNAN,
i = (vf32_t) {-0.0, 0.0, 0.0, -0.0 };
j = (vf32_t) CONST_VINT128_W(__FLOAT_NAN, __FLOAT_NNAN, __FLOAT_NSNAN,
__FLOAT_SNAN);
j = (vf32_t) {-0.0, 0.0, 0.0, -0.0 };
e = (vf32_t) CONST_VINT128_W(__FLOAT_NNAN, __FLOAT_NAN, __FLOAT_SNAN,
__FLOAT_NSNAN);
k = vec_copysignf32 (i, j);
Expand Down
26 changes: 13 additions & 13 deletions src/testsuite/arith128_test_f64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,7 @@ test_double_cpsgn (void)

i = (vf64_t) { 0.0, -0.0 };
j = (vf64_t) { -0.0, 0.0 };
e = (vf64_t) { -0.0, 0.0 };
e = (vf64_t) { 0.0, -0.0 };
k = vec_copysignf64 (i, j);

#ifdef __DEBUG_PRINT__
Expand All @@ -1606,8 +1606,8 @@ test_double_cpsgn (void)
#endif
rc += check_v2f64x ("vec_copysignf64 1:", k, e);

i = (vf64_t) { __DBL_MAX__, __DBL_MIN__ };
j = (vf64_t) { -0.0, 0.0 };
i = (vf64_t) { -0.0, 0.0 };
j = (vf64_t) { __DBL_MAX__, __DBL_MIN__ };
e = (vf64_t) { -(__DBL_MAX__), __DBL_MIN__ };
k = vec_copysignf64 (i, j);

Expand All @@ -1618,8 +1618,8 @@ test_double_cpsgn (void)
#endif
rc += check_v2f64x ("vec_copysignf64 2:", k, e);

i = (vf64_t) { __DBL_EPSILON__, __DBL_DENORM_MIN__ };
j = (vf64_t) { -0.0, 0.0 };
i = (vf64_t) { -0.0, 0.0 };
j = (vf64_t) { __DBL_EPSILON__, __DBL_DENORM_MIN__ };
e = (vf64_t) { -(__DBL_EPSILON__), __DBL_DENORM_MIN__ };
k = vec_copysignf64 (i, j);

Expand All @@ -1630,8 +1630,8 @@ test_double_cpsgn (void)
#endif
rc += check_v2f64x ("vec_copysignf64 3:", k, e);

i = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF);
j = (vf64_t) CONST_VINT64_DW(0.0, -0.0);
i = (vf64_t) CONST_VINT64_DW(0.0, -0.0);
j = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF);
e = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF);
k = vec_copysignf64 (i, j);

Expand All @@ -1642,8 +1642,8 @@ test_double_cpsgn (void)
#endif
rc += check_v2f64x ("vec_copysignf64 4:", k, e);

i = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF);
j = (vf64_t) CONST_VINT64_DW(0.0, -0.0);
i = (vf64_t) CONST_VINT64_DW(0.0, -0.0);
j = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF);
e = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF);
k = vec_copysignf64 (i, j);

Expand All @@ -1654,8 +1654,8 @@ test_double_cpsgn (void)
#endif
rc += check_v2f64x ("vec_copysignf64 5:", k, e);

i = (vf64_t) CONST_VINT128_DW(__DOUBLE_NAN, __DOUBLE_NNAN);
j = (vf64_t) CONST_VINT64_DW( -0.0, 0.0 );
i = (vf64_t) CONST_VINT64_DW( -0.0, 0.0 );
j = (vf64_t) CONST_VINT128_DW(__DOUBLE_NAN, __DOUBLE_NNAN);
e = (vf64_t) CONST_VINT128_DW(__DOUBLE_NNAN, __DOUBLE_NAN);
k = vec_copysignf64 (i, j);

Expand All @@ -1666,8 +1666,8 @@ test_double_cpsgn (void)
#endif
rc += check_v2f64x ("vec_copysignf64 6:", k, e);

i = (vf64_t) CONST_VINT128_DW(__DOUBLE_NSNAN, __DOUBLE_SNAN);
j = (vf64_t) CONST_VINT64_DW ( 0.0, -0.0 );
i = (vf64_t) CONST_VINT64_DW ( 0.0, -0.0 );
j = (vf64_t) CONST_VINT128_DW(__DOUBLE_NSNAN, __DOUBLE_SNAN);
e = (vf64_t) CONST_VINT128_DW(__DOUBLE_SNAN, __DOUBLE_NSNAN);
k = vec_copysignf64 (i, j);

Expand Down
6 changes: 4 additions & 2 deletions src/testsuite/arith128_test_i128.c
Original file line number Diff line number Diff line change
Expand Up @@ -2308,7 +2308,8 @@ test_msumudm (void)
int
test_muludq (void)
{
vui32_t i, j, k, l /*, m*/;
vui32_t i, j, k/*, l , m*/;
vui128_t l;
vui32_t e, ec;
int rc = 0;

Expand Down Expand Up @@ -2383,7 +2384,8 @@ test_muludq (void)
int
test_madduq (void)
{
vui32_t i, j, k, l, m, n;
vui32_t i, j, k, m, n;
vui128_t l;
vui32_t e, ec;
int rc = 0;

Expand Down
Loading

0 comments on commit e3ecd3b

Please sign in to comment.