From b2f469920306a651861bf891aefb55d1fa896e29 Mon Sep 17 00:00:00 2001 From: Naoki Shibata Date: Tue, 1 Sep 2020 10:51:12 +0900 Subject: [PATCH] no message (#331) Co-authored-by: shibatch --- CHANGELOG.md | 112 +- Configure.cmake | 6 + doc/html/CMakeLists.txt | 6 +- doc/html/aarch32.xhtml | 26 +- doc/html/aarch64.xhtml | 48 +- doc/html/additional.xhtml | 271 ++- doc/html/benchmark.xhtml | 2 +- doc/html/compile.xhtml | 316 ++-- doc/html/dft.xhtml | 2 +- doc/html/index.xhtml | 223 ++- doc/html/misc.xhtml | 76 +- doc/html/ph.c | 115 ++ doc/html/ppc64.xhtml | 46 +- doc/html/purec.xhtml | 84 +- doc/html/s390x.xhtml | 3586 ++++++++++++++++++++++++++++++++++++ doc/html/x86.xhtml | 66 +- src/gencoef/Makefile | 10 +- src/gencoef/mkrempitabqp.c | 63 + 18 files changed, 4669 insertions(+), 389 deletions(-) create mode 100644 doc/html/ph.c create mode 100644 doc/html/s390x.xhtml create mode 100644 src/gencoef/mkrempitabqp.c diff --git a/CHANGELOG.md b/CHANGELOG.md index 86ec2d03..44a21ef4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,12 @@ -# Changelog -All notable changes to this project will be documented in this file. +## 3.5 - 2020-09-01 +- IBM System/390 support is added. +- The library can be built with Clang on Windows. +- Static libraries with LTO can be generated. +- Alternative division and sqrt methods can be chosen with AArch64. +- Header files for inlining the whole SLEEF functions can be generated. +- IEEE remainder function is added. +- GCC-10 can now build SLEEF with SVE support. -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). - -## Next release ## 3.4.1 - 2019-10-01 ### Changed - Fixed accuracy problem with tan_u35, atan_u10, log2f_u35 and exp10f_u10. @@ -34,6 +36,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Many functions are now faster - Testers are now faster + ## 3.3.1 - 2018-08-20 ### Added - FreeBSD support is added @@ -42,6 +45,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Trigonometric functions now evaluate correctly with full FP domain. https://github.com/shibatch/sleef/pull/210 + ## 3.3 - 2018-07-06 ### Added - SVE target support is added to libsleef. @@ -57,6 +61,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. https://github.com/shibatch/sleef/pull/195 - Payne-Hanek like argument reduction is added to libsleef. https://github.com/shibatch/sleef/pull/197 + ## 3.2 - 2018-02-26 ### Added - The whole build system of the project migrated from makefiles to @@ -89,3 +94,98 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Removed - Makefile build system + +## 3.1 - 2017-07-19 +- Added AArch64 support +- Implemented the remaining C99 math functions : lgamma, tgamma, + erf, erfc, fabs, copysign, fmax, fmin, fdim, trunc, floor, ceil, + round, rint, modf, ldexp, nextafter, frexp, hypot, and fmod. +- Added dispatcher for x86 functions +- Improved reduction of trigonometric functions +- Added support for 32-bit x86, Cygwin, etc. +- Improved tester + +## 3.0 - 2017-02-07 +- New API is defined +- Functions for DFT are added +- sincospi functions are added +- gencoef now supports single, extended and quad precision in addition to double precision +- Linux, Windows and Mac OS X are supported +- GCC, Clang, Intel Compiler, Microsoft Visual C++ are supported +- The library can be compiled as DLLs +- Files needed for creating a debian package are now included + +## 2.120 - 2017-01-30 +- Relicensed to Boost Software License Version 1.0 + +## 2.110 - 2016-12-11 +- The valid range of argument is extended for trig functions +- Specification of each functions regarding to the domain and accuracy is added +- A coefficient generation tool is added +- New testing tools are introduced +- Following functions returned incorrect values when the argument is very large or small : exp, pow, asinh, acosh +- SIMD xsin and xcos returned values more than 1 when FMA is enabled +- Pure C cbrt returned incorrect values when the argument is negative +- tan_u1 returned values with more than 1 ulp of error on rare occasions +- Removed support for Java language(because no one seems using this) + +## 2.100 - 2016-12-04 +- Added support for AVX-512F and Clang Extended Vectors. + +## 2.90 - 2016-11-27 +- Added ilogbf. All the reported bugs(listed below) are fixed. +- Log function returned incorrect values when the argument is very small. +- Signs of returned values were incorrect when the argument is signed zero. +- Tester incorrectly counted ULP in some cases. +- ilogb function returned incorrect values in some cases. + +## 2.80 - 2013-05-18 +- Added support for ARM NEON. Added higher accuracy single + precision functions : sinf_u1, cosf_u1, sincosf_u1, tanf_u1, asinf_u1, + acosf_u1, atanf_u1, atan2f_u1, logf_u1, and cbrtf_u1. + +## 2.70 - 2013-04-30 +- Added higher accuracy functions : sin_u1, cos_u1, sincos_u1, + tan_u1, asin_u1, acos_u1, atan_u1, atan2_u1, log_u1, and + cbrt_u1. These functions evaluate the corresponding function with at + most 1 ulp of error. + +## 2.60 - 2013-03-26 +- Added the remaining single precision functions : powf, sinhf, + coshf, tanhf, exp2f, exp10f, log10f, log1pf. Added support for FMA4 + (for AMD Bulldozer). Added more test cases. Fixed minor bugs (which + degraded accuracy in some rare cases). + +## 2.50 - 2013-03-12 +- Added support for AVX2. SLEEF now compiles with ICC. + +## 2.40 - 2013-03-07 +- Fixed incorrect denormal/nonnumber handling in ldexp, ldexpf, + sinf and cosf. Removed support for Go language. + +## 2.31 - 2012-07-05 +- Added sincosf. + +## 2.30 - 2012-01-20 +- Added single precision functions : sinf, cosf, tanf, asinf, + acosf, atanf, logf, expf, atan2f and cbrtf. + +## 2.20 - 2012-01-09 +- Added exp2, exp10, expm1, log10, log1p, and cbrt. + +## 2.10 - 2012-01-05 +- asin() and acos() are back. +- Added ilogb() and ldexp(). +- Added hyperbolic functions. +- Eliminated dependency on frexp, ldexp, fabs, isnan and isinf. + +## 2.00 - 2011-12-30 +- All of the algorithm has been updated. +- Both accuracy and speed are improved since version 1.10. +- Denormal number handling is also improved. + +## 1.10 - 2010-06-22 +- AVX support is added. Accuracy tester is added. + +## 1.00 - 2010-05-15 +- Initial release diff --git a/Configure.cmake b/Configure.cmake index 3507bc54..7e00be45 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -822,7 +822,13 @@ find_program(SED_COMMAND sed) if(SLEEF_SHOW_ERROR_LOG) if (EXISTS ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log) file(READ ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log FILE_CONTENT) + message("") + message("===== Content of CMakeError.log =====") + message("") message("${FILE_CONTENT}") + message("") + message("=======================================") + message("") endif() endif(SLEEF_SHOW_ERROR_LOG) diff --git a/doc/html/CMakeLists.txt b/doc/html/CMakeLists.txt index 8aea5eaa..3b8a5dc2 100644 --- a/doc/html/CMakeLists.txt +++ b/doc/html/CMakeLists.txt @@ -4,11 +4,11 @@ find_package(Git REQUIRED) ExternalProject_Add(libsleef GIT_REPOSITORY https://github.com/shibatch/sleef - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${sleef_BINARY_DIR}/contrib + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib ) -include_directories(${sleef_BINARY_DIR}/contrib/include) -link_directories(${sleef_BINARY_DIR}/contrib/lib) +include_directories(${CMAKE_BINARY_DIR}/contrib/include) +link_directories(${CMAKE_BINARY_DIR}/contrib/lib) add_executable(hellox86 hellox86.c) add_dependencies(hellox86 libsleef) diff --git a/doc/html/aarch32.xhtml b/doc/html/aarch32.xhtml index e00d4d47..b8bf9754 100644 --- a/doc/html/aarch32.xhtml +++ b/doc/html/aarch32.xhtml @@ -10,10 +10,10 @@ -SLEEF Documentation +SLEEF - Math library reference (AArch32) -

SLEEF Documentation - Math library reference

+

SLEEF Documentation - Math library reference (AArch32)

Table of contents

@@ -38,6 +38,7 @@
  • Other functions
  • Data types and functions for PPC64 architecture
  • +
  • Data types and functions for System/390 architecture
  •  
  • @@ -1199,6 +1200,27 @@ This is the vectorized function of Sleef_fmodf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

    +
    + +

    Vectorized single precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float32x4_t Sleef_remainderf4(float32x4_t a, float32x4_t b);
    +float32x4_t Sleef_remainderf4_neon(float32x4_t a, float32x4_t b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainderf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant. +

    +

    Vectorized single precision function for obtaining fractional component of an FP number

    diff --git a/doc/html/aarch64.xhtml b/doc/html/aarch64.xhtml index 7aa14bca..9c22336e 100644 --- a/doc/html/aarch64.xhtml +++ b/doc/html/aarch64.xhtml @@ -10,10 +10,10 @@ -SLEEF Documentation +SLEEF - Math library reference (AArch64) -

    SLEEF Documentation - Math library reference

    +

    SLEEF Documentation - Math library reference (AArch64)

    Table of contents

    @@ -38,6 +38,7 @@
  • Data types and functions for AArch32 architecture
  • Data types and functions for PPC64 architecture
  • +
  • Data types and functions for System/390 architecture
  •  
  • @@ -3689,6 +3690,49 @@ This is the vectorized function of Sleef_fmodf with the same accuracy specification.

    +
    + +

    Vectorized double precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float64x2_t Sleef_remainderd2(float64x2_t a, float64x2_t b);
    +float64x2_t Sleef_remainderd2_advsimd(float64x2_t a, float64x2_t b);
    +svfloat64_t Sleef_remainderdx_sve(svfloat64_t a, svfloat64_t b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainder with the same accuracy specification. +

    + +
    +

    Vectorized single precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float32x4_t Sleef_remainderf4(float32x4_t a, float32x4_t b);
    +float32x4_t Sleef_remainderf4_advsimd(float32x4_t a, float32x4_t b);
    +svfloat32_t Sleef_remainderfx_sve(svfloat32_t a, svfloat32_t b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainderf with the same accuracy specification. +

    +

    Vectorized double precision function for multiplying by integral power of 2

    diff --git a/doc/html/additional.xhtml b/doc/html/additional.xhtml index 236e12e2..281b0692 100644 --- a/doc/html/additional.xhtml +++ b/doc/html/additional.xhtml @@ -10,7 +10,7 @@ -SLEEF Documentation +SLEEF - Additional Notes

    SLEEF Documentation - Additional Notes

    @@ -27,14 +27,54 @@
  •  
  • Additional notes
  • +

    Frequently asked questions

    + +

    + Q1: Is the scalar functions in SLEEF faster than the + corresponding functions in the standard C library? +

    + +
    + +

    + A1: No. Todays standard C libraries are very well optimized, + and there is small room for further optimization. The reason why + SLEEF is fast is that it carries out computation directly on SIMD + registers and ALUs. This is not simple as it sounds, because + conditional branches have to be eliminated in order to take full + advantage of SIMD computation. If the algorithm requires conditional + branches according to the argument, it must prepare for the case + where the elements in the input vector contain both values that + would make a branch happen and not happen. This would spoil the + advantage of SIMD computation, because each element in a vector + would require a different code path. +

    + +
    +
    + +

    + Q2: Do the trigonometric functions (e.g. sin) in SLEEF return + correct values for the whole range of inputs? +

    + +
    + +

    + A2: Yes. SLEEF does implement a vectorized version of Payne + Hanek range reduction, and all the trigonometric functions return + a correct value with the specified accuracy. +

    +

    About the GNUABI version of the library

    @@ -43,25 +83,34 @@ compatible with libmvec in glibc, and the API comforms to the vector - ABI. This library is built and installed by default, and some - compilers may call the functions in this library. + href="https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt">x86 + vector ABI, AArch64 vector + ABI and Power + Vector ABI. This library is built and installed by default, and + certain compilers call the functions in this library.

    -

    How the dispatcher works

    +

    How the dispatchers work

    + The dispatchers in SLEEF are designed to have very low overhead. This + overhead is so small and cannot be observed by microbenchmarking. +

    + +

    Fig. 7.1 shows a simplified code of our dispatcher. There is only one exported function mainFunc. When mainFunc is called for the first time, dispatcherMain is called internally, since funcPtr is initialized to the pointer to - dispatcherMain(line 14). It then detects if the - CPU supports SSE 4.1(line 7), and + dispatcherMain (line 14). It then detects if the + CPU supports SSE 4.1 (line 7), and rewrites funcPtr to a pointer to the function that utilizes SSE 4.1 or SSE 2, depending on the result of CPU - feature detection(line 10). When + feature detection (line 10). When mainFunc is called for the second time, it does not execute the dispatcherMain. It just executes the function @@ -71,8 +120,8 @@

    - There are a few advantages in our dispatcher. The first advantage is - that it does not require any compiler-specific extension. The second + There are advantages in our dispatcher. The first advantage is that + it does not require any compiler-specific extension. The second advantage is simplicity. There are only 18 lines of simple code. Since the dispatchers are completely separated for each function, there is not much room for bugs to get in. @@ -80,11 +129,12 @@

    The third advantage is low overhead. You might think that the - overhead is one function call including execution of prologue and - epilogue. However, since modern compilers eliminate redundant - execution of the prologue, epilogue and return instruction, the - actual overhead is just one jmp instruction. This is very fast since - it is not conditional. + overhead is one function call including execution of the prologue + and the epilogue. However, modern compilers are smart enough to + eliminate redundant execution of the prologue, epilogue and return + instruction. The actual overhead is just one jmp instruction, which + has very small overhead since it is not conditional. This overhead + is likely hidden by out-of-order execution.

    @@ -97,7 +147,8 @@ extensions. Once funcPtr is substituted with the pointer to funcSSE2 or funcSSE4, it will not be changed in the - future. It is obvious that the code works in all the cases. + future. It should be easy to confirm that the code works in all the + cases.

    @@ -130,93 +181,151 @@

    ULP stands for "unit in the last place", which is sometimes used for - measuring accuracy of calculations. 1 ULP is basically the distance - between the two closest floating point number, which depends on the - exponent of the FP number. The accuracy of calculations by reputable - math libraries is usually between 0.5 and 1 ULP. Here, the accuracy - means the largest error of calculation, which only happens in the - worst case. SLEEF math library provides multiple accuracy choices - for some math functions. Many functions have 3.5-ULP and 1-ULP - versions, and 3.5-ULP versions are significantly faster than 1-ULP - versions. If you care more about execution speed than accuracy, it - is advised to use the 3.5-ULP versions along with -ffast-math or - "unsafe math optimization" options for the compiler. + representing accuracy of calculation. 1 ULP is the distance between + the two closest floating point number, which depends on the exponent + of the FP number. The accuracy of calculation by reputable math + libraries is usually between 0.5 and 1 ULP. Here, the accuracy means + the largest error of calculation. SLEEF math library provides + multiple accuracy choices for most of the math functions. Many + functions have 3.5-ULP and 1-ULP versions, and 3.5-ULP versions are + faster than 1-ULP versions. If you care more about execution speed + than accuracy, it is advised to use the 3.5-ULP versions along with + -ffast-math or "unsafe math optimization" options for the compiler. +

    + +

    + Note that 3.5 ULPs of error is small enough in many applications. If + you do not manage the error of computation by carefully ordering + floating point operations in your code, you would easily have that + amount of error in the computation results.

    In IEEE 754 standard, underflow does not happen abruptly when the - exponent becomes zero. Instead, denormal numbers are produced which - has less precision, and this is sometimes called gradual - underflow. On some implementation which is not IEEE-754 conformant, - flush-to-zero mode is used since it is easier to implement. In - flush-to-zero mode, numbers smaller than the smallest normalized - number cannot be represented, and it is replaced with zero. Because - of this, the accuracy of calculation may be influenced in some - cases. The smallest normalized precision number can be referred with - DBL_MIN for double precision, and FLT_MIN for single precision. The - naming of these macros is a little bit confusing because DBL_MIN is - not the smallest double precision number. + exponent becomes zero. Instead, when a number to be represented is + smaller than a certain value, a denormal number is produced which + has less precision. This is sometimes called gradual underflow. On + some processor implementation, a flush-to-zero mode is used since it + is easier to implement by hardware. In flush-to-zero mode, numbers + smaller than the smallest normalized number are replaced with + zero. FP operations are not IEEE-754 conformant if a flush-to-zero + mode is used. A flush-to-zero mode influences the accuracy of + calculation in some cases. The smallest normalized precision number + can be referred with DBL_MIN for double precision, and FLT_MIN for + single precision. The naming of these macros is a little bit + confusing because DBL_MIN is not the smallest double precision + number.

    You can see known maximum errors in math functions in glibc - on this page.

    -

    About sincospi

    + +

    Explanatory source code for our modified Payne Hanek reduction method

    - The sincospi series of functions evaluates sin( - πa ) and cos( - πa ) simultaneously. These functions are - added to SLEEF as of version 3.0. There are a few reasons that I - added these functions. + In order to evaluate a trigonometric function with a large argument, + an argument reduction method is used to find an FP remainder of + dividing the argument x by π. We devised a + variation of the Payne-Hanek argument reduction method which is + suitable for vector computation. Fig. 7.2 + shows an explanatory source + code for this method. See our paper for + the details.

    -

    - C standards include specifications for functions that evaluate - trigonometric functions. In order to do calculations for evaluating - these functions, reduction of an argument is required. This involves - a multiple precision multiplication with π, - which requires many operations of addition and multiplication. This - is slow especially if accurate evaluation is required. By designing - the function in a way that the argument is pre-multiplied - by π, this reduction can be eliminated. This - leads to faster and more accurate evaluation. +

    +#include <stdio.h>
    +#include <stdlib.h>
    +#include <math.h>
    +#include <mpfr.h>
    +
    +typedef struct { double x, y; } double2;
    +double2 dd(double d) { double2 r = { d, 0 }; return r; }
    +int64_t d2i(double d) { union { double f; int64_t i; } tmp = {.f = d }; return tmp.i; }
    +double i2d(int64_t i) { union { double f; int64_t i; } tmp = {.i = i }; return tmp.f; }
    +double upper(double d) { return i2d(d2i(d) & 0xfffffffff8000000LL); }
    +double clearlsb(double d) { return i2d(d2i(d) & 0xfffffffffffffffeLL); }
    +
    +double2 ddrenormalize(double2 t) {
    +  double2 s = dd(t.x + t.y);
    +  s.y = t.x - s.x + t.y;
    +  return s;
    +}
    +
    +double2 ddadd(double2 x, double2 y) {
    +  double2 r = dd(x.x + y.x);
    +  double v = r.x - x.x;
    +  r.y = (x.x - (r.x - v)) + (y.x - v) + (x.y + y.y);
    +  return r;
    +}
    +
    +double2 ddmul(double x, double y) {
    +  double2 r = dd(x * y);
    +  r.y = fma(x, y, -r.x);
    +  return r;
    +}
    +
    +double2 ddmul2(double2 x, double2 y) {
    +  double2 r = ddmul(x.x, y.x);
    +  r.y += x.x * y.y + x.y * y.x;
    +  return r;
    +}
    +
    +// This function computes remainder(a, PI/2)
    +double2 modifiedPayneHanek(double a) {
    +  double table[4];
    +  int scale = fabs(a) > 1e+200 ? -128 : 0;
    +  a = ldexp(a, scale);
    +
    +  // Table genration
    +
    +  mpfr_set_default_prec(2048);
    +  mpfr_t pi, m;
    +  mpfr_inits(pi, m, NULL);
    +  mpfr_const_pi(pi, GMP_RNDN);
    +
    +  mpfr_d_div(m, 2, pi, GMP_RNDN);
    +  mpfr_set_exp(m, mpfr_get_exp(m) + (ilogb(a) - 53 - scale));
    +  mpfr_frac(m, m, GMP_RNDN);
    +  mpfr_set_exp(m, mpfr_get_exp(m) - (ilogb(a) - 53));
    +
    +  for(int i=0;i<4;i++) {
    +    table[i] = clearlsb(mpfr_get_d(m, GMP_RNDN));
    +    mpfr_sub_d(m, m, table[i], GMP_RNDN);
    +  }
    +
    +  mpfr_clears(pi, m, NULL);
    +
    +  // Main computation
    +
    +  double2 x = dd(0);
    +  for(int i=0;i<4;i++) {
    +    x = ddadd(x, ddmul(a, table[i]));
    +    x.x = x.x - round(x.x);
    +    x = ddrenormalize(x);
    +  }
    +
    +  double2 pio2 = { 3.141592653589793*0.5, 1.2246467991473532e-16*0.5 };
    +  x = ddmul2(x, pio2);
    +  return fabs(a) < 0.785398163397448279 ? dd(a) : x;
    +}
    +
    +

    + Fig. 7.2: Explanatory source code for our modified Payne Hanek reduction method

    -

    - The second reason is that sincospi functions are handy for - implementing an FFT library. FFT libraries need to evaluate - trigonometric functions for generating twiddle factors that is used in - the butterfly operations. Since the butterfly operations are - repeatedly applied, the error in twiddle factors accumulates. Thus, we - want to make the error in twiddle factors as small as possible. In an - FFT of power-of-two size, twiddle factors are - sin( πm / - 2n ) where m - and n are integer. If we just use the usual - trigonometric functions defined in the C standards with the - precision same as that used for butterfly operations, we already - have error when calculating arguments, since - πm / 2n cannot - be represented as a floating point value without error. On the - other hand, if we use sincospi function, the argument can be - accurately represented by a radix 2 FP number. Thus, we can - calculate twiddle factors with better accuracy. -

    -

    - The third reason is that sinpi is needed internally to implement - gamma functions. -

    - It is a soup ladle. Sleef means a soup ladle in Dutch. + It is a soup ladle. A sleef means a soup ladle in Dutch.


    diff --git a/doc/html/benchmark.xhtml b/doc/html/benchmark.xhtml index 91fb22d0..dd1d54be 100644 --- a/doc/html/benchmark.xhtml +++ b/doc/html/benchmark.xhtml @@ -9,7 +9,7 @@ -SLEEF Documentation - Benchmark Results +SLEEF - Benchmark Results

    SLEEF Documentation - Benchmark Results

    diff --git a/doc/html/compile.xhtml b/doc/html/compile.xhtml index 8715b0a6..bab4cfc6 100644 --- a/doc/html/compile.xhtml +++ b/doc/html/compile.xhtml @@ -9,7 +9,7 @@ -SLEEF Documentation +SLEEF - Compiling and installing the library

    SLEEF Documentation - Compiling and installing the library

    @@ -21,10 +21,11 @@
  •  
  • Compiling and installing the library
  • @@ -36,49 +37,41 @@
  • Additional notes
  • -

    About CMake

    +

    Preliminaries

    -CMake is an open-source and -cross-platform building tool for software packages that provides easy -managing of multiple build systems at a time. It works by allowing the -developer to specify build parameters and rules in a simple text file -that cmake then processes to generate project files for the actual -native build tools (e.g. UNIX Makefiles, Microsoft Visual Studio, -Apple XCode, etc). That means you can easily maintain multiple -separate builds for one project and manage cross-platform hardware and -software complexity. +In order to build SLEEF, you need CMake, which is an open-source and +cross-platform building tool. In order to test the library, it is +better to have the +GNU MPFR Library, Libssl and +FFTW.

    -If you are not already familiar with cmake, please refer to the +CMake works by allowing the developer to specify build parameters and +rules in a simple text file that cmake then processes to generate +project files for the actual native build tools (e.g. UNIX Makefiles, +Microsoft Visual Studio, Apple XCode, etc). If you are not already +familiar with cmake, please refer to the official documentation or -the basic -introductions in the wiki (recommended). -

    - -

    -Before using CMake you will need to install/build the binaries on your -system. Most systems have cmake already installed or provided by the -standard package manager. If that is not the case for you, please -download and install now. -For building SLEEF, version 3.4.3 is the minimum required. +the basic +introductions in the wiki.

    Quick start

    1. Make sure cmake is available on the command-line. The command below -should display a version number greater than or equal to 3.4.3. +should display a version number greater than or equal to 3.5.1.

    $ cmake --version

    -2. Download the tar from -the software -repository or checkout out the source code from -the GitHub repository. +2. Checkout out the source code from our GitHub repository.

    $ git clone https://github.com/shibatch/sleef
    @@ -92,29 +85,11 @@ $ mkdir build && cd build

    4. Run cmake to configure your project and generate the system to build it:

    -
    $ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
    -	-DCMAKE_INSTALL_PREFIX=../my-sleef-install \
    -	..
    +
    $ cmake ..

    -This flag configures an optimised libsleef shared library build with -basic debug info. By default, cmake will autodetect your system -platform and configure the build using the default parameters. You can -control and modify these parameters by setting variables when running -cmake. See the list of options and variables for customizing -your build. -

    - -

    -NOTE: On Windows, you need to use a specific generator like this: -`cmake -G"Visual Studio 15 2017 Win64" ..` specifying the Visual -Studio version and targeting specifically `Win64` (to support -compilation of AVX/AVX2) Check `cmake -G` to get a full list of -supported Visual Studio project generators. This generator will -create a proper solution `SLEEF.sln` under the build directory. You -can still use `cmake --build .` to build without opening Visual -Studio. +See the list of options and +variables for customizing your build.

    @@ -124,82 +99,189 @@ of the build directory:

    $ make

    -6. Install the library under ../my-sleef/install by running: +6. You can execute the tests by running:

    -
    $ make install
    +
    $ make test

    -7. You can execute the tests by running: +7. Install the library under ../my-sleef/install by running:

    -
    $ make test
    +
    $ make install
    -

    Build customization

    -

    -Variables dictate how the build is generated; options are defined and undefined, -respectively, on the cmake command line like this: -

    -
      -
    • cmake -DVARIABLE=<value> <cmake-build-dir>
    • -
    • cmake -UVARIABLE <cmake-build-dir>
    • -
    -

    -Build configurations allow a project to be built in different ways for debug, -optimized, or any other special set of flags. -

    +

    Common CMake variables

    -

    CMake Variables

    +

    +Below is the list of common cmake variables that are used to +configure a build for SLEEF. +

      -
    • `CMAKE_BUILD_TYPE`: By default, CMake supports the following configuration:
    • +
    • CMAKE_BUILD_TYPE: By default, CMake supports the following configuration:
      • `Release`: Basic optimizations are turned on. This is the default setting.
      • `Debug`: Basic debug flags are turned on. Optimization is disabled.
      • `MinSizeRel`: Builds the smallest (but not fastest) object code
      • `RelWithDebInfo`: Builds optimized code with debug information as well
      -
    • `CMAKE_INSTALL_PREFIX` : The prefix it - uses when running `make install`. Defaults to /usr/local on - GNU/Linux and MacOS. Defaults to C:/Program Files on Windows.
    • -
    • `CMAKE_C_FLAGS_RELEASE` : The - optimization options used by the compiler.
    • -
    • `BUILD_TESTS` : Avoid building testing +
    • BUILD_SHARED_LIBS : Static libs are built if set to + FALSE
    • +
    • CMAKE_C_FLAGS_RELEASE : The optimization options used by + the compiler.
    • +
    • CMAKE_INSTALL_PREFIX : The prefix it uses when running + `make install`. Defaults to /usr/local on GNU/Linux and + MacOS. Defaults to C:/Program Files on Windows.
    • +
    + +

    SLEEF-specific CMake variables

    + +

    +Below is the list of SLEEF-specific cmake variables. +

    + +
      + +
    • SLEEF_SHOW_CONFIG : Show relevant + cmake variables upon configuring a build
    • +
    • SLEEF_SHOW_ERROR_LOG : Show the content of + CMakeError.log
    • +
    • BUILD_TESTS : Avoid building testing tools if set to FALSE
    • -
    • `BUILD_GNUABI_LIBS` : Avoid building +
    • ENABLE_ALTDIV : Enable alternative division method (aarch64 only)
    • +
    • ENABLE_ALTSQRT : Enable alternative sqrt method (aarch64 only)
    • +
    • DISABLE_LONG_DOUBLE : Disable support for long double data type
    • +
    • ENFORCE_LONG_DOUBLE : Build fails if long double data type is + not supported by the compiler
    • +
    • DISABLE_FLOAT128 : Disable support for float128 data type
    • +
    • ENFORCE_FLOAT128 : Build fails if float128 data type is not + supported by the compiler
    • +
    • DISABLE_OPENMP : Disable support for OpenMP
    • +
    • ENFORCE_OPENMP : Build fails if OpenMP is not + supported by the compiler
    • + + +
    • ENABLE_LTO : Enable support for LTO with gcc, or thinLTO + with llvm
    • +
    • LLVM_AR_COMMAND : Specify LLVM AR command when you build + the library with thinLTO support with clang.
    • +
    • SLEEF_ENABLE_LLVM_BITCODE : Generate LLVM bitcode
    • +
    • BUILD_INLINE_HEADERS : Generate header files for inlining + whole SLEEF functions
    • + + +
    • DISABLE_SSE2 : Disable support for x86 SSE2
    • +
    • ENFORCE_SSE2 : Build fails if SSE2 is not + supported by the compiler
    • +
    • DISABLE_SSE4 : Disable support for x86 SSE4
    • +
    • ENFORCE_SSE4 : Build fails if SSE4 is not + supported by the compiler
    • +
    • DISABLE_AVX : Disable support for x86 AVX
    • +
    • ENFORCE_AVX : Build fails if AVX is not + supported by the compiler
    • +
    • DISABLE_FMA4 : Disable support for x86 FMA4
    • +
    • ENFORCE_FMA4 : Build fails if FMA4 is not + supported by the compiler
    • +
    • DISABLE_AVX2 : Disable support for x86 AVX2
    • +
    • ENFORCE_AVX2 : Build fails if AVX2 is not + supported by the compiler
    • +
    • DISABLE_AVX512F : Disable support for x86 AVX512F
    • +
    • ENFORCE_AVX512F : Build fails if AVX512F is not + supported by the compiler
    • +
    • DISABLE_SVE : Disable support for AArch64 SVE
    • +
    • ENFORCE_SVE : Build fails if SVE is not + supported by the compiler
    • +
    • DISABLE_VSX : Disable support for PowerPC VSX
    • +
    • ENFORCE_VSX : Build fails if VSX is not + supported by the compiler
    • +
    • DISABLE_ZVECTOR2 : Disable support for ZVECTOR2
    • +
    • ENFORCE_ZVECTOR2 : Build fails if ZVECTOR2 is not + supported by the compiler
    • + + +
    • BUILD_GNUABI_LIBS : Avoid building libraries with GNU ABI if set to FALSE
    • -
    • `BUILD_DFT` : Avoid building DFT +
    • ENFORCE_TESTER3 : Build fails if + tester3 cannot be built
    • + + +
    • BUILD_DFT : Avoid building DFT libraries if set to FALSE
    • -
    • `BUILD_SHARED_LIBS` : Static libs are - built if set to FALSE
    • -
    • `SLEEFDFT_MAXBUTWIDTH` : This variable +
    • SLEEFDFT_MAXBUTWIDTH : This variable specifies the maximum length of combined butterfly block used in the DFT. Setting this value to 7 makes DFT faster but compilation takes more time and the library size will be larger.
    • +
    • DISABLE_FFTW : Disable FFW-based testing of the DFT + library.
    • + + +
    • BUILD_QUAD : An experimental quad-precision + library will be built if set to TRUE
    -

    Compiling and installing library on Linux

    +

    Compiling and installing the library on Linux

    - In order to build the library, you need to install libmpfr and - OpenMP(libmpfr is only required to build the tester, and it is not - linked to the library). Please change the directory to sleef-3.X and - run the following commands. - -

    $ mkdir build
    +  In order to build the library, you need to install OpenMP. In order
    +  to test the library, you need to install libmpfr, libssl and
    +  libfftw3. Availability of these libraries are checked upon execution
    +  of cmake. Please change the directory to sleef-3.X and run the
    +  following commands.
    +
    +  
    $ sudo apt-get install libmpfr-dev libssl-dev libfftw3-dev
    +$ mkdir build
     $ cd build
     $ cmake -DCMAKE_INSTALL_PREFIX=/usr ..
     $ make
     $ make test
     $ sudo make install

    - -

    + +

    + Parallel build is only supported with Ninja. +

    + +
    + +

    In order to uninstall the libraries and headers, run the following command. +

    + +
    $ sudo xargs rm -v < install_manifest.txt
    + +

    Building the library with LTO support

    + +

    + You can build the library with link time opimization(LTO) support + with the following commands. Note that you can only build static + libraries with LTO support. You also have to use the same compiler + with the same version to build the library and other source codes. +

    + +
    $ CC=gcc cmake -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE ..
    + +

    + In order to build the library with thinLTO support with clang, you + need to specify LLVM AR command that exactly corresponds to the + clang compiler. +

    + +
    $ CC=clang-9 cmake -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-9 ..
    -
    $ sudo xargs rm -v < install_manifest.txt
    + +

    Building the header files for inlining the whole SLEEF functions

    + +

    + Header files for inlining the whole SLEEF functions can be built + with the following commands. With these header files, it may be + easier to inline the whole SLEEF functions than using LTO. You + cannot include more than one of these header files from the same + file. You have to specify "-ffp-contract=off" compiler option when + compiling a source code that includes one of these header files.

    -

    Installing library on Debian and Ubuntu

    +
    $ cmake -DBUILD_INLINE_HEADERS=TRUE ..
    + +

    Installing the library on Debian and Ubuntu

    If you are using Debian 10(Buster), Ubuntu 18.04(Bionic) or later, @@ -210,21 +292,45 @@ $ sudo make install

    $ sudo apt-get install libsleef-dev

    -

    Compiling library with Microsoft Visual C++

    +

    Compiling the library with Microsoft Visual C++

    - You need Visual Studio 2017. Open developer command prompt for - VS2017, change directory to sleef-3.X, and then run the following - commands. - -

    D:\sleef-3.X> mkdir build
    +  You need Visual Studio 2019. Open developer command prompt for
    +  VS2019 and change directory to sleef-3.X. When configuring a build
    +  with cmake, you need to use a specific generator: `cmake -G"Visual
    +  Studio 16 2019" ..` This generator will create a proper solution
    +  `SLEEF.sln` under the build directory. You can still use `cmake
    +  --build .` to build the library without opening Visual Studio.
    +

    + +

    + Below is an example of commands for building SLEEF with Visual + Studio. +

    + +
    D:\sleef-3.X> mkdir build
     D:\sleef-3.X> cd build
    -D:\sleef-3.X\build> cmake -G"Visual Studio 15 2017 Win64" ..
    +D:\sleef-3.X\build> cmake -G"Visual Studio 15 2017 Win64" ..    &:: If you are using VS2017
    +D:\sleef-3.X\build> cmake -G"Visual Studio 16 2019" ..          &:: If you are using VS2019
     D:\sleef-3.X\build> cmake --build . --config Release -- /maxcpucount:1
    - Note that parallel build is not supported on MSVC. + +

    Compiling the library with Clang on Windows

    + +

    + You need Visual Studio 2019. Install ninja via VS2019 installer. + Download and install clang on Windows from llvm.org. Below + is an example of commands for building SLEEF with Clang on Windows.

    +
    D:\sleef-3.X> "c:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
    +D:\sleef-3.X> mkdir build
    +D:\sleef-3.X> cd build
    +D:\sleef-3.X\build> cmake -GNinja -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" ..
    +D:\sleef-3.X\build> ninja
    + +

    Compiling and running "Hello SLEEF"

    @@ -296,17 +402,17 @@ $ █

    -cmake_minimum_required(VERSION 3.4.3)
    +cmake_minimum_required(VERSION 3.5.1)
     include(ExternalProject)
     find_package(Git REQUIRED)
     
     ExternalProject_Add(libsleef
       GIT_REPOSITORY https://github.com/shibatch/sleef
    -  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${sleef_BINARY_DIR}/contrib
    +  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib
     )
     
    -include_directories(${sleef_BINARY_DIR}/contrib/include)
    -link_directories(${sleef_BINARY_DIR}/contrib/lib)
    +include_directories(${CMAKE_BINARY_DIR}/contrib/include)
    +link_directories(${CMAKE_BINARY_DIR}/contrib/lib)
     
     add_executable(hellox86 hellox86.c)
     add_dependencies(hellox86 libsleef)
    diff --git a/doc/html/dft.xhtml b/doc/html/dft.xhtml
    index 47d0561b..da00b61d 100644
    --- a/doc/html/dft.xhtml
    +++ b/doc/html/dft.xhtml
    @@ -10,7 +10,7 @@
     
     
     
    -SLEEF Documentation
    +SLEEF - DFT library reference
     
     
     

    SLEEF Documentation - DFT library reference

    diff --git a/doc/html/index.xhtml b/doc/html/index.xhtml index 74a3ddc7..b56da4eb 100644 --- a/doc/html/index.xhtml +++ b/doc/html/index.xhtml @@ -21,6 +21,7 @@
  • GitHub repository
  • Mailing list
  • Discussion forum
  • +
  • Wiki pages
  • Table of contents

    @@ -30,8 +31,6 @@
    • Overview
    • Supported environments
    • - -
    • Credit
    • Partners
    • License
    • @@ -54,46 +53,50 @@

      SLEEF stands for SIMD Library for Evaluating Elementary Functions. It implements vectorized versions of all C99 real - floating point math functions. It can utilize SIMD instructions of - modern processors. SLEEF is designed to fully utilize SIMD - computation by reducing the use of conditional branches and - scatter/gather memory access. Our benchmarks show that the performance of SLEEF is comparable to that of the best commercial library. Unlike - vendor-tuned libraries, SLEEF is portable : it can be easily ported - to other architectures by writing a helper file, which is a thin - abstraction layer of SIMD intrinsics. SLEEF is also designed to work - with various operating systems and compilers. + vendor-tuned assembly-optimized libraries, SLEEF can be easily + ported to other architectures by writing a helper file, which is a + thin abstraction layer of SIMD intrinsics. SLEEF is also designed to + work with various operating systems and compilers. Link time + optimization can be used to reduce the overhead of calling + functions.

      - The library contains subroutines for all C99 real FP math functions - in double precision and single precision. Different accuracy of the - results can be chosen for a subset of the elementary functions; for - this subset there are versions with up to 1 ulp error (which is the - maximum error, not the average) and versions with a few ulp - error. Obviously, less accurate versions are faster. For non-finite - inputs and outputs, the library should return the same results as - libm as specified in the C99 standard. The library is rigorously - tested if the evaluation error is within the designed limit. The - library is tested against high-precision evaluation - using the libmpfr - library. Especially, we carefully checked the error of the - trigonometric functions when the arguments are close to an integral - multiple of π/2. + The library contains implementations of all C99 real FP math + functions in double precision and single precision. Different + accuracy of the results can be chosen for a subset of the elementary + functions; for this subset there are versions with up to 1 ULP error + (which is the maximum error, not the average) and even faster + versions with a few ULPs of error. For non-finite inputs and + outputs, the functions return correct results as specified in the + C99 standard. All the functions in the library are thoroughly tested + and confirmed that the evaluation error is within the designed + limit, by comparing the returned values against high-precision + evaluation using the GNU MPFR Library. Especially, we + carefully checked the error of the trigonometric functions with + arguments close to an integral multiple + of π/2.

      SLEEF also includes subroutines for discrete Fourier transform(DFT). These subroutines are fully vectorized, heavily - unrolled, and parallelized so that modern SIMD instructions and - multiple cores can be utilized for efficient computation. It has - an API similar to that of FFTW for easy migration, and distributed under + unrolled, and parallelized in such a way that modern SIMD + instructions and multiple cores can be utilized for efficient + computation. It has an API similar to that of FFTW for easy + migration, and distributed under BSL, which is a permissive open source license. The subroutines can utilize long - vectors up to 2048 bits, and even longer vectors can be utilized by - a small modification. The helper files for abstracting SIMD + vectors up to 2048 bits. The helper files for abstracting SIMD intrinsics are shared with SLEEF libm, and thus it is easy to port DFT subroutines to other architectures. Preliminary @@ -109,20 +112,21 @@



      In addition to the SIMD implementation, Pure C (scalar) version is - provided. For x86 architecture, the library provides dispatchers - that automatically choose the best subroutines for the computer the - library is run. The supported combinations of the architecture, - operating system and compiler are shown in Table 1.1. + provided. The library provides dispatchers that automatically choose + the best subroutines for the computer the library is executed + on. The supported combinations of the architecture, operating system + and compiler are shown in Table 1.1.

      @@ -159,19 +163,11 @@ - x86 (64bit), Linux + x86_64, Linux Supported Supported - Supported(*1) - N/A - - - x86 (32bit), Linux - - Supported(*2) - Supported(*2) - + Supported N/A @@ -185,40 +181,48 @@ AArch32, Linux - Supported(*3) - Supported(*3) + Supported(*1) + Supported(*1) N/A N/A PowerPC, Linux - - Supported(*4) + Supported + Supported N/A N/A - x86 (64bit), FreeBSD + System/390, Linux - + Supported Supported - + N/A N/A - x86 (64bit), OS X + x86_64, FreeBSD - Supported + Supported + N/A + N/A + + + x86_64, OS X + + Supported(*2) + Supported(*2) N/A - x86 (64bit), Windows + x86_64, Windows - Supported(Cygwin)(*5) - Supported(Cygwin)(*5) + Supported(Cygwin)(*3) + Supported(*3) Supported @@ -237,33 +241,24 @@
      • GCC : version 5 and later
      • -
      • Clang : version 3.9 and later
      • +
      • Clang : version 6 and later
      • Intel Compiler : ICC version 17
      • -
      • MSVC : Visual Studio 2017
      • +
      • MSVC : Visual Studio 2019

      - *1 FMA4 is not supported by Intel Compiler. -

      - -

      - *2 SSE2 is required to run the scalar functions on 32-bit x86 - architecture. x87 is not supported. -

      - -

      - *3 NEON has only single precision support. The computation results - are not in full accuracy since NEON is not IEEE 754-compliant. + *1 NEON has only single precision support. The computation results + are not in full accuracy because NEON is not IEEE 754-compliant.

      - *4 Clang-5.0 and later are supported. + *2 LTO is not supported.

      - *5 AVX functions are not supported for Cygwin, since AVX is not + *3 AVX functions are not supported for Cygwin, because AVX is not supported by Cygwin ABI.

      @@ -282,7 +277,7 @@ ( shibatch@users.sourceforge.net ) at Nara Institute of Science and Technology.
    • Francesco Petrogalli + href="https://github.com/fpetrogalli">Francesco Petrogalli at ARM Ltd. contributed the helper for AArch64 (helperadvsimd.h, helpersve.h) and GNUABI interface of the library. He also worked on migrating the build system to cmake, and reviewed the code, gave @@ -317,8 +312,7 @@ height="135px" alt="NAIST logo"/>

      -The Mobile -Computing Lab at Division of Information Science of Nara Institute +Division of Information Science of Nara Institute of Science and Technology participates through Naoki Shibata.

      @@ -375,6 +369,25 @@ information about Boost Software License.

      History

      +

      3.5 (Released on Sep 1, 2020)

      +
        +
      • IBM System/390 support is added (PR #291)
      • +
      • The library can be built with Clang on Windows (PR #300)
      • +
      • Static libraries with LTO can be generated (PR + #290)
      • +
      • Alternative division and sqrt methods can be chosen with AArch64 (PR + #289)
      • +
      • Header files for inlining the whole SLEEF functions can be +generated (PR #283)
      • +
      • IEEE remainder function is added (PR + #271)
      • +
      • GCC-10 can now build SLEEF with SVE support (PR + #310)
      • +
      +

      3.4.1 (Released on Oct 1, 2019)

      • Fixed accuracy problem with tan_u35, atan_u10, log2f_u35 and exp10f_u10 (PR @@ -457,57 +470,22 @@ information about Boost Software License. FMA4 implementation.
      -

      3.1 (Released on July 19, 2017)

      -
        -
      • Added AArch64 support
      • -
      • Implemented the remaining C99 math functions : lgamma, tgamma, - erf, erfc, fabs, copysign, fmax, fmin, fdim, trunc, floor, ceil, - round, rint, modf, ldexp, nextafter, frexp, hypot, and fmod.
      • -
      • Added dispatcher for x86 functions
      • -
      • Improved reduction of trigonometric functions
      • -
      • Added support for 32-bit x86, Cygwin, etc.
      • -
      • Improved tester
      • -
      • Etc.
      • -
      - -

      3.0 (Released on Feb. 7, 2017)

      -
        -
      • New API is defined
      • -
      • Functions for DFT are added
      • -
      • sincospi functions are added
      • -
      • gencoef now supports single, extended and quad precision in addition to double precision
      • -
      • Linux, Windows and Mac OS X are supported
      • -
      • GCC, Clang, Intel Compiler, Microsoft Visual C++ are supported
      • -
      • The library can be compiled as DLLs
      • -
      • Files needed for creating a debian package are now included
      • -
      - -

      2.120 (Released on Jan. 30, 2017)

      -
        -
      • Relicensed to Boost Software License Version 1.0
      • -
      +

      +See Changelog +for older changes.

      -

      2.110 (Released on Dec. 11, 2016)

      -
        -
      • The valid range of argument is extended for trig functions
      • -
      • Specification of each functions regarding to the domain and accuracy is added
      • -
      • A coefficient generation tool is added
      • -
      • New testing tools are introduced
      • -
      • Following functions returned incorrect values when the argument is very large or small : exp, pow, asinh, acosh
      • -
      • SIMD xsin and xcos returned values more than 1 when FMA is enabled
      • -
      • Pure C cbrt returned incorrect values when the argument is negative
      • -
      • tan_u1 returned values with more than 1 ulp of error on rare occasions
      • -
      • Removed support for Java language(because no one seems using this)
      • -
      +
    • Agenium Scale NSIMD
    • +
    • J Language
    • +
    • SIMD Everywhere
    • +
    • Minocore
    • +
    • OctaSine
    • +
    • Simdeez
    • +

    Publication

      +
    • Naoki Shibata and Francesco Petrogalli : SLEEF: A Portable Vectorized Library of C Standard Mathematical Functions, in IEEE Transactions on Parallel and Distributed Systems, DOI:10.1109/TPDS.2019.2960333 (Dec. 2019). [PDF]
    • Francesco Petrogalli and Paul Walker : LLVM and the automatic vectorization of loops invoking math routines: -fsimdmath, 2018 IEEE/ACM 5th Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC), pp. 30-38., DOI:10.1109/LLVM-HPC.2018.8639354 (Nov. 2018). [PDF]
    diff --git a/doc/html/misc.xhtml b/doc/html/misc.xhtml index d2441a2c..0867a972 100644 --- a/doc/html/misc.xhtml +++ b/doc/html/misc.xhtml @@ -9,7 +9,7 @@ -SLEEF Documentation +SLEEF - Other tools included in the package

    SLEEF Documentation - Other tools included in the package

    @@ -37,27 +37,36 @@

    Libm tester

    - SLEEF libm has two kinds of testers, and each kind of testers has + SLEEF libm has three kinds of testers, and each kind of testers has its own role.

    - The first kind of testers consist of a tester and an IUT (which - stands for Implementation Under Test.) Those two are built as - separate executables, and communicate with each other using a - pipe. The role for this tester is to perform a perfunctory set of - tests to check if the build is correct. It is also performs - regression tests. Since the tester executable and the iut executable - are separated, the iut can be implemented with an exotic - languages. It is also possible to perform a test over the network. + The first kind of testers consists of a tester and an IUT (which + stands for Implementation Under Test.) The role for this tester is + to perform a perfunctory set of tests to check if the build is + correct. It also performs regression tests. The tester and IUT are + built as separate executables, and communicate with each other using + a pipe. Since these two are separate, the IUT can be implemented + with an exotic languages or on an operating system that does not + support libraries required for testing. It is also possible to + perform a test over the network.

    The second kind of testers are designed to run continuously. It repeats randomly generating arguments for each function, and comparing the results of each function to the results calculated - with the corresponding function in libmpfr. This tester is expected - to find bugs if it is run for sufficiently long time. + with the corresponding function in the MPFR library. This tester is + expected to find bugs if it is run for sufficiently long time. +

    + +

    + The third kind of testers are for testing if bit-identical results + are returned from the functions that are supposed to return such + results. The MD5 hash value of all returned values from each + function is calculated and checked if it matches the precomputed + value.

    @@ -65,18 +74,16 @@

    SLEEF DFT has three kinds of testers. The first ones, named - naivetest, compare the results computed by SLEEF DFT with that from - a naive DFT implementation. These testers cannot be built with MSVC + naivetest, compare the results computed by SLEEF DFT with those by a + naive DFT implementation. These testers cannot be built with MSVC since complex data types are not supported. The second testers, named fftwtest, compare the results of computation between SLEEF DFT - and FFTW. Rigorous testing is possible with the second testers, but - obviously it requires FFTW to run. The third testers, named + and FFTW. This test requires FFTW library. The third testers, named roundtriptest, executes a forward transform followed by a backward - transform. Then, it compares the results with the original data. An - advantage of the third testers is that it does not require external - library and it runs on all environment, but there could be many - cases that this testing does not find flaw. The third testers are - used only if FFTW is not available. + transform. Then, it compares the results with the original data. + While this test does not require external library and it runs on all + environment, there could be cases where this test does not find some + flaw. The roundtrip testers are used only if FFTW is not available.

    @@ -90,10 +97,10 @@

    In order to change the configurations, please edit gencoefdp.c. In the beginning of the file, specifications of the parameters for - generating coefficients are listed. Enable one of them by changing - #if. Then, run make to compile the source code. Run the gencoef, and - it will show the generated coefficients in a few minutes. It may - take longer time depending on the settings. + generating coefficients are listed. Please enable one of them by + changing #if. Then, run make to compile the source code. Run the + gencoef, and it will show the generated coefficients in a few + minutes. It may take longer time depending on the settings.

    @@ -101,21 +108,21 @@ regression for minimizing the maximum relative error. This problem can be reduced to a linear programming problem, and the Simplex method is used in this implementation. This requires multi-precision - calculation, and the implementation uses the MPFR library to do - this. In this phase, it uses only a small number of values - (specified by macro S, usually less than 100) within the input - domain of the kernel function to approximate the function. The - function to approximate is given by FRFUNC function. Specifying - higher values for S does not always give better results. + calculation, and the implementation uses the MPFR library. In this + phase, it uses only a small number of values (specified by the macro + S, usually less than 100) within the input domain of the kernel + function to approximate the function. The function to approximate is + given by FRFUNC function. Specifying higher values for S does not + always give better results.

    The second phase is to optimize the coefficients so that it gives good accuracy with double precision calculation. In this phase, it - checks 10000 points (specified by macro Q) within the specified + checks 10000 points (specified by the macro Q) within the specified argument range to see if the polynomial gives good error bounds. In some cases, the last few terms have to be calculated in higher - precision in order to achieve 1 ULP or less overall accuracy, and + precision in order to achieve 1 ULP or better overall accuracy, and this implementation can take care of that. The L parameter specifies the number of high precision coefficients.

    @@ -150,7 +157,8 @@

    Then, change directory to sleef-3.X/src/libm-benchmarks/. You also - need to set the build directory to BUILDDIR environment variable. + need to set the build directory to BUILDDIR environment + variable. You also need Java runtime environment.

    $ export BUILDDIR=$PATH:`pwd`/../../build
    diff --git a/doc/html/ph.c b/doc/html/ph.c new file mode 100644 index 00000000..0c6496cf --- /dev/null +++ b/doc/html/ph.c @@ -0,0 +1,115 @@ +// Explanatory source code for the modified Payne Hanek reduction +// http://dx.doi.org/10.1109/TPDS.2019.2960333 + +#include +#include +#include +#include + +typedef struct { double x, y; } double2; +double2 dd(double d) { double2 r = { d, 0 }; return r; } +int64_t d2i(double d) { union { double f; int64_t i; } tmp = {.f = d }; return tmp.i; } +double i2d(int64_t i) { union { double f; int64_t i; } tmp = {.i = i }; return tmp.f; } +double upper(double d) { return i2d(d2i(d) & 0xfffffffff8000000LL); } +double clearlsb(double d) { return i2d(d2i(d) & 0xfffffffffffffffeLL); } + +double2 ddrenormalize(double2 t) { + double2 s = dd(t.x + t.y); + s.y = t.x - s.x + t.y; + return s; +} + +double2 ddadd(double2 x, double2 y) { + double2 r = dd(x.x + y.x); + double v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y.x - v) + (x.y + y.y); + return r; +} + +double2 ddmul(double x, double y) { + double2 r = dd(x * y); + r.y = fma(x, y, -r.x); + return r; +} + +double2 ddmul2(double2 x, double2 y) { + double2 r = ddmul(x.x, y.x); + r.y += x.x * y.y + x.y * y.x; + return r; +} + +// This function computes remainder(a, PI/2) +double2 modifiedPayneHanek(double a) { + double table[4]; + int scale = fabs(a) > 1e+200 ? -128 : 0; + a = ldexp(a, scale); + + // Table genration + + mpfr_set_default_prec(2048); + mpfr_t pi, m; + mpfr_inits(pi, m, NULL); + mpfr_const_pi(pi, GMP_RNDN); + + mpfr_d_div(m, 2, pi, GMP_RNDN); + mpfr_set_exp(m, mpfr_get_exp(m) + (ilogb(a) - 53 - scale)); + mpfr_frac(m, m, GMP_RNDN); + mpfr_set_exp(m, mpfr_get_exp(m) - (ilogb(a) - 53)); + + for(int i=0;i<4;i++) { + table[i] = clearlsb(mpfr_get_d(m, GMP_RNDN)); + mpfr_sub_d(m, m, table[i], GMP_RNDN); + } + + mpfr_clears(pi, m, NULL); + + // Main computation + + double2 x = dd(0); + for(int i=0;i<4;i++) { + x = ddadd(x, ddmul(a, table[i])); + x.x = x.x - round(x.x); + x = ddrenormalize(x); + } + + double2 pio2 = { 3.141592653589793*0.5, 1.2246467991473532e-16*0.5 }; + x = ddmul2(x, pio2); + return fabs(a) < 0.785398163397448279 ? dd(a) : x; +} + +int main(int argc, char **argv) { + double a = ldexp(6381956970095103.0, 797); + if (argc > 1) a = atof(argv[1]); + printf("a = %.20g\n", a); + + // + + mpfr_set_default_prec(2048); + mpfr_t pi, pio2, x, y, r; + mpfr_inits(pi, pio2, x, y, r, NULL); + + mpfr_const_pi(pi, GMP_RNDN); + mpfr_mul_d(pio2, pi, 0.5, GMP_RNDN); + + // + + mpfr_set_d(x, a, GMP_RNDN); + mpfr_remainder(r, x, pio2, GMP_RNDN); + + mpfr_printf("mpfr = %.64RNf\n", r); + + // + + double2 dd = modifiedPayneHanek(a); + mpfr_set_d(x, dd.x, GMP_RNDN); + mpfr_add_d(x, x, dd.y, GMP_RNDN); + + mpfr_printf("dd = %.64RNf\n", x); + + mpfr_sub(x, x, r, GMP_RNDN); + mpfr_abs(x, x, GMP_RNDN); + mpfr_div(x, x, r, GMP_RNDN); + + double err = mpfr_get_d(x, GMP_RNDN); + printf("error = %g\n", err); +} diff --git a/doc/html/ppc64.xhtml b/doc/html/ppc64.xhtml index 79a90ff7..2b634a69 100644 --- a/doc/html/ppc64.xhtml +++ b/doc/html/ppc64.xhtml @@ -10,10 +10,10 @@ -SLEEF Documentation +SLEEF - Math library reference (POWER) -

    SLEEF Documentation - Math library reference

    +

    SLEEF Documentation - Math library reference (POWER)

    Table of contents

    @@ -38,6 +38,7 @@
  • Nearest integer functions
  • Other functions
  • +
  • Data types and functions for System/390 architecture
  •  
  • @@ -3152,6 +3153,47 @@ This is the vectorized function of Sleef_fmodf with the same accuracy specification.

    +
    + +

    Vectorized double precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +vector double Sleef_remainderd2(vector double a, vector double b);
    +vector double Sleef_remainderd2_vsx(vector double a, vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainder with the same accuracy specification. +

    + +
    +

    Vectorized single precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +vector float Sleef_remainderf4(vector float a, vector float b);
    +vector float Sleef_remainderf4_vsx(vector float a, vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainderf with the same accuracy specification. +

    +

    Vectorized double precision function for multiplying by integral power of 2

    diff --git a/doc/html/purec.xhtml b/doc/html/purec.xhtml index 8b8776bb..85b92dbf 100644 --- a/doc/html/purec.xhtml +++ b/doc/html/purec.xhtml @@ -10,7 +10,7 @@ -SLEEF Documentation +SLEEF - Math library reference (scalar)

    SLEEF Documentation - Math library reference

    @@ -34,10 +34,11 @@
  • Nearest integer functions
  • Other functions
  • -
  • Data types and functions for x86 architectures
  • -
  • Data types and functions for AArch64 architectures
  • -
  • Data types and functions for AArch32 architectures
  • +
  • Data types and functions for x86 architecture
  • +
  • Data types and functions for AArch64 architecture
  • +
  • Data types and functions for AArch32 architecture
  • Data types and functions for PPC64 architecture
  • +
  • Data types and functions for System/390 architecture
  •  
  • @@ -118,7 +119,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the sine function of a value + These functions evaluate the sine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -144,7 +145,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the sine function of a value + These functions evaluate the sine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -170,7 +171,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the cosine function of a value + These functions evaluate the cosine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -196,7 +197,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the cosine function of a value + These functions evaluate the cosine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -337,7 +338,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the sine functions of + These functions evaluate the sine functions of πa . The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if @@ -367,7 +368,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the cosine functions of + These functions evaluate the cosine functions of πa . The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if @@ -398,7 +399,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the tangent function of a value + These functions evaluate the tangent function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -424,7 +425,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the tangent function of a value + These functions evaluate the tangent function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions @@ -858,7 +859,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc sine function of a value + These functions evaluate the arc sine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno @@ -884,7 +885,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc sine function of a value + These functions evaluate the arc sine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno @@ -910,7 +911,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc cosine function of a value + These functions evaluate the arc cosine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno @@ -936,7 +937,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc cosine function of a value + These functions evaluate the arc cosine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno @@ -962,7 +963,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc tangent function of a value + These functions evaluate the arc tangent function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno @@ -988,7 +989,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc tangent function of a value + These functions evaluate the arc tangent function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno @@ -1014,7 +1015,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc tangent function + These functions evaluate the arc tangent function of (y / x). The quadrant of the result is determined according to the signs of x and y. The error bounds of the returned values @@ -1043,7 +1044,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the arc tangent function + These functions evaluate the arc tangent function of (y / x). The quadrant of the result is determined according to the signs of x and y. The error bound of the returned value is 3.5 ULP. These functions treat the non-number @@ -1071,7 +1072,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the hyperbolic sine function of a value + These functions evaluate the hyperbolic sine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-709, 709] for the double-precision function or [-88.5, 88.5] for the single-precision function . If a is a @@ -1101,7 +1102,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the hyperbolic sine function of a value + These functions evaluate the hyperbolic sine function of a value in a. The error bound of the returned value is 3.5 ULP if a is in [-709, 709] for the double-precision function or [-88, 88] for the single-precision function . If a is a @@ -1131,7 +1132,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the hyperbolic cosine function of a value + These functions evaluate the hyperbolic cosine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-709, 709] for the double-precision function or [-88.5, 88.5] for the single-precision function . If a is a @@ -1161,7 +1162,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the hyperbolic cosine function of a value + These functions evaluate the hyperbolic cosine function of a value in a. The error bound of the returned value is 3.5 ULP if a is in [-709, 709] for the double-precision function or [-88, 88] for the single-precision function . If a is a @@ -1191,7 +1192,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the hyperbolic tangent function of a value + These functions evaluate the hyperbolic tangent function of a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return @@ -1218,7 +1219,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the hyperbolic tangent function of a value + These functions evaluate the hyperbolic tangent function of a value in a. The error bound of the returned value is 3.5 ULP for the double-precision function or 3.5 ULP for the single-precision function. These functions treat the non-number arguments and return @@ -1245,7 +1246,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the inverse hyperbolic sine function of a + These functions evaluate the inverse hyperbolic sine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-1.34e+154, 1.34e+154] for the double-precision function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for @@ -1276,7 +1277,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the inverse hyperbolic cosine function of + These functions evaluate the inverse hyperbolic cosine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-1.34e+154, 1.34e+154] for the double-precision function or 1.001 ULP if a is in [-1.84e+19, @@ -1307,7 +1308,7 @@ extended-precision (80-bit) floating point values, which is defined in

    Description

    - These functions evaluates the inverse hyperbolic tangent function of + These functions evaluate the inverse hyperbolic tangent function of a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number @@ -1588,6 +1589,31 @@ max(0.50001 ULP, FLT_MIN), respectively.


    +

    Sleef_remainder, Sleef_remainderf - FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_remainder(double x, double y);
    +float Sleef_remainderf(float x, float y);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    + These functions return the value as specified in the C99 + specification of remainder and remainderf functions, if |x / y| is + smaller than 1e+300 and 1e+38, respectively. The returned value is + undefined, otherwise. These functions do not set errno nor raise an + exception. +

    + +
    +

    Sleef_ldexp, Sleef_ldexpf - multiply by integral power of 2

    Synopsis

    diff --git a/doc/html/s390x.xhtml b/doc/html/s390x.xhtml new file mode 100644 index 00000000..76b4fb62 --- /dev/null +++ b/doc/html/s390x.xhtml @@ -0,0 +1,3586 @@ + + + + + + + + + + + + +SLEEF - Math library reference (S390X) + + +

    SLEEF Documentation - Math library reference (S390X)

    + +

    Table of contents

    + + + +

    Data types for System/390 architecture

    + +

    Sleef_SLEEF_VECTOR_FLOAT_2

    + +

    Description

    + +

    +Sleef_SLEEF_VECTOR_FLOAT_2 is a data type for storing two __vector float values, +which is defined in sleef.h as follows: +

    + +
    typedef struct {
    +  __vector float x, y;
    +} Sleef_SLEEF_VECTOR_FLOAT_2;
    +
    + +
    + +

    Sleef_SLEEF_VECTOR_DOUBLE_2

    + +

    Description

    + +

    +Sleef_SLEEF_VECTOR_DOUBLE_2 is a data type for storing two __vector double values, +which is defined in sleef.h as follows: +

    + +
    typedef struct {
    +  __vector double x, y;
    +} Sleef_SLEEF_VECTOR_DOUBLE_2;
    +
    + +

    Trigonometric Functions

    + +

    Vectorized double precision sine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_sind1_u10purec(double a);
    +double Sleef_sind1_u10purecfma(double a);
    +double Sleef_cinz_sind1_u10purec(double a);
    +double Sleef_finz_sind1_u10purecfma(double a);
    +
    +__vector double Sleef_sind2_u10(__vector double a);
    +__vector double Sleef_sind2_u10zvector2(__vector double a);
    +__vector double Sleef_sind2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sind2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sind2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sin_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision sine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_sinf1_u10purec(float a);
    +float Sleef_sinf1_u10purecfma(float a);
    +float Sleef_cinz_sinf1_u10purec(float a);
    +float Sleef_finz_sinf1_u10purecfma(float a);
    +
    +__vector float Sleef_sinf4_u10(__vector float a);
    +__vector float Sleef_sinf4_u10zvector2(__vector float a);
    +__vector float Sleef_sinf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sinf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sinf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision sine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_sind1_u35purec(double a);
    +double Sleef_sind1_u35purecfma(double a);
    +double Sleef_cinz_sind1_u35purec(double a);
    +double Sleef_finz_sind1_u35purecfma(double a);
    +
    +__vector double Sleef_sind2_u35(__vector double a);
    +__vector double Sleef_sind2_u35zvector2(__vector double a);
    +__vector double Sleef_sind2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sind2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sind2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sin_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision sine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_sinf1_u35purec(float a);
    +float Sleef_sinf1_u35purecfma(float a);
    +float Sleef_cinz_sinf1_u35purec(float a);
    +float Sleef_finz_sinf1_u35purecfma(float a);
    +
    +__vector float Sleef_sinf4_u35(__vector float a);
    +__vector float Sleef_sinf4_u35zvector2(__vector float a);
    +__vector float Sleef_sinf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sinf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sinf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision cosine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_cosd1_u10purec(double a);
    +double Sleef_cosd1_u10purecfma(double a);
    +double Sleef_cinz_cosd1_u10purec(double a);
    +double Sleef_finz_cosd1_u10purecfma(double a);
    +
    +__vector double Sleef_cosd2_u10(__vector double a);
    +__vector double Sleef_cosd2_u10zvector2(__vector double a);
    +__vector double Sleef_cosd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_cosd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_cosd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cos_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision cosine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_cosf1_u10purec(float a);
    +float Sleef_cosf1_u10purecfma(float a);
    +float Sleef_cinz_cosf1_u10purec(float a);
    +float Sleef_finz_cosf1_u10purecfma(float a);
    +
    +__vector float Sleef_cosf4_u10(__vector float a);
    +__vector float Sleef_cosf4_u10zvector2(__vector float a);
    +__vector float Sleef_cosf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_cosf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_cosf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_cosd1_u35purec(double a);
    +double Sleef_cosd1_u35purecfma(double a);
    +double Sleef_cinz_cosd1_u35purec(double a);
    +double Sleef_finz_cosd1_u35purecfma(double a);
    +
    +__vector double Sleef_cosd2_u35(__vector double a);
    +__vector double Sleef_cosd2_u35zvector2(__vector double a);
    +__vector double Sleef_cosd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_cosd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_cosd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cos_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_cosf1_u35purec(float a);
    +float Sleef_cosf1_u35purecfma(float a);
    +float Sleef_cinz_cosf1_u35purec(float a);
    +float Sleef_finz_cosf1_u35purecfma(float a);
    +
    +__vector float Sleef_cosf4_u35(__vector float a);
    +__vector float Sleef_cosf4_u35zvector2(__vector float a);
    +__vector float Sleef_cosf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_cosf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_cosf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +Sleef_double2 Sleef_sincosd1_u10purec(double a);
    +Sleef_double2 Sleef_sincosd1_u10purecfma(double a);
    +Sleef_double2 Sleef_cinz_sincosd1_u10purec(double a);
    +Sleef_double2 Sleef_finz_sincosd1_u10purecfma(double a);
    +
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u10(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u10zvector2(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u10zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincosd2_u10zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincosd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +Sleef_float2 Sleef_sincosf1_u10purec(float a);
    +Sleef_float2 Sleef_sincosf1_u10purecfma(float a);
    +Sleef_float2 Sleef_cinz_sincosf1_u10purec(float a);
    +Sleef_float2 Sleef_finz_sincosf1_u10purecfma(float a);
    +
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u10(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u10zvector2(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u10zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincosf4_u10zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincosf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +Sleef_double2 Sleef_sincosd1_u35purec(double a);
    +Sleef_double2 Sleef_sincosd1_u35purecfma(double a);
    +Sleef_double2 Sleef_cinz_sincosd1_u35purec(double a);
    +Sleef_double2 Sleef_finz_sincosd1_u35purecfma(double a);
    +
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u35(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u35zvector2(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u35zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincosd2_u35zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincosd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +Sleef_float2 Sleef_sincosf1_u35purec(float a);
    +Sleef_float2 Sleef_sincosf1_u35purecfma(float a);
    +Sleef_float2 Sleef_cinz_sincosf1_u35purec(float a);
    +Sleef_float2 Sleef_finz_sincosf1_u35purecfma(float a);
    +
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u35(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u35zvector2(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u35zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincosf4_u35zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincosf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision sine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_sinpid1_u05purec(double a);
    +double Sleef_sinpid1_u05purecfma(double a);
    +double Sleef_cinz_sinpid1_u05purec(double a);
    +double Sleef_finz_sinpid1_u05purecfma(double a);
    +
    +__vector double Sleef_sinpid2_u05(__vector double a);
    +__vector double Sleef_sinpid2_u05zvector2(__vector double a);
    +__vector double Sleef_sinpid2_u05zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sinpid2_u05zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sinpid2_u05zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification. +

    + +
    +

    Vectorized single precision sine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_sinpif1_u05purec(float a);
    +float Sleef_sinpif1_u05purecfma(float a);
    +float Sleef_cinz_sinpif1_u05purec(float a);
    +float Sleef_finz_sinpif1_u05purecfma(float a);
    +
    +__vector float Sleef_sinpif4_u05(__vector float a);
    +__vector float Sleef_sinpif4_u05zvector2(__vector float a);
    +__vector float Sleef_sinpif4_u05zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sinpif4_u05zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sinpif4_u05zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification. +

    + +
    +

    Vectorized double precision cosine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_cospid1_u05purec(double a);
    +double Sleef_cospid1_u05purecfma(double a);
    +double Sleef_cinz_cospid1_u05purec(double a);
    +double Sleef_finz_cospid1_u05purecfma(double a);
    +
    +__vector double Sleef_cospid2_u05(__vector double a);
    +__vector double Sleef_cospid2_u05zvector2(__vector double a);
    +__vector double Sleef_cospid2_u05zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_cospid2_u05zvector2nofma(__vector double a);
    +__vector double Sleef_finz_cospid2_u05zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification. +

    + +
    +

    Vectorized single precision cosine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_cospif1_u05purec(float a);
    +float Sleef_cospif1_u05purecfma(float a);
    +float Sleef_cinz_cospif1_u05purec(float a);
    +float Sleef_finz_cospif1_u05purecfma(float a);
    +
    +__vector float Sleef_cospif4_u05(__vector float a);
    +__vector float Sleef_cospif4_u05zvector2(__vector float a);
    +__vector float Sleef_cospif4_u05zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_cospif4_u05zvector2nofma(__vector float a);
    +__vector float Sleef_finz_cospif4_u05zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification. +

    + +
    +

    Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +Sleef_double2 Sleef_sincospid1_u05purec(double a);
    +Sleef_double2 Sleef_sincospid1_u05purecfma(double a);
    +Sleef_double2 Sleef_cinz_sincospid1_u05purec(double a);
    +Sleef_double2 Sleef_finz_sincospid1_u05purecfma(double a);
    +
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u05(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u05zvector2(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u05zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincospid2_u05zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincospid2_u05zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification. +

    + +
    +

    Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +Sleef_float2 Sleef_sincospif1_u05purec(float a);
    +Sleef_float2 Sleef_sincospif1_u05purecfma(float a);
    +Sleef_float2 Sleef_cinz_sincospif1_u05purec(float a);
    +Sleef_float2 Sleef_finz_sincospif1_u05purecfma(float a);
    +
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u05(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u05zvector2(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u05zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincospif4_u05zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincospif4_u05zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification. +

    + +
    +

    Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +Sleef_double2 Sleef_sincospid1_u35purec(double a);
    +Sleef_double2 Sleef_sincospid1_u35purecfma(double a);
    +Sleef_double2 Sleef_cinz_sincospid1_u35purec(double a);
    +Sleef_double2 Sleef_finz_sincospid1_u35purecfma(double a);
    +
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u35(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u35zvector2(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u35zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincospid2_u35zvector2nofma(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincospid2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +Sleef_float2 Sleef_sincospif1_u35purec(float a);
    +Sleef_float2 Sleef_sincospif1_u35purecfma(float a);
    +Sleef_float2 Sleef_cinz_sincospif1_u35purec(float a);
    +Sleef_float2 Sleef_finz_sincospif1_u35purecfma(float a);
    +
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u35(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u35zvector2(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u35zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincospif4_u35zvector2nofma(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincospif4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision tangent function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_tand1_u10purec(double a);
    +double Sleef_tand1_u10purecfma(double a);
    +double Sleef_cinz_tand1_u10purec(double a);
    +double Sleef_finz_tand1_u10purecfma(double a);
    +
    +__vector double Sleef_tand2_u10(__vector double a);
    +__vector double Sleef_tand2_u10zvector2(__vector double a);
    +__vector double Sleef_tand2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_tand2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_tand2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tan_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision tangent function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_tanf1_u10purec(float a);
    +float Sleef_tanf1_u10purecfma(float a);
    +float Sleef_cinz_tanf1_u10purec(float a);
    +float Sleef_finz_tanf1_u10purecfma(float a);
    +
    +__vector float Sleef_tanf4_u10(__vector float a);
    +__vector float Sleef_tanf4_u10zvector2(__vector float a);
    +__vector float Sleef_tanf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_tanf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_tanf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision tangent function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_tand1_u35purec(double a);
    +double Sleef_tand1_u35purecfma(double a);
    +double Sleef_cinz_tand1_u35purec(double a);
    +double Sleef_finz_tand1_u35purecfma(double a);
    +
    +__vector double Sleef_tand2_u35(__vector double a);
    +__vector double Sleef_tand2_u35zvector2(__vector double a);
    +__vector double Sleef_tand2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_tand2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_tand2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tan_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision tangent function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_tanf1_u35purec(float a);
    +float Sleef_tanf1_u35purecfma(float a);
    +float Sleef_cinz_tanf1_u35purec(float a);
    +float Sleef_finz_tanf1_u35purecfma(float a);
    +
    +__vector float Sleef_tanf4_u35(__vector float a);
    +__vector float Sleef_tanf4_u35zvector2(__vector float a);
    +__vector float Sleef_tanf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_tanf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_tanf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification. +

    + +

    Power, exponential, and logarithmic function

    + +

    Vectorized double precision power function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_powd1_u10purec(double a, double b);
    +double Sleef_powd1_u10purecfma(double a, double b);
    +double Sleef_cinz_powd1_u10purec(double a, double b);
    +double Sleef_finz_powd1_u10purecfma(double a, double b);
    +
    +__vector double Sleef_powd2_u10(__vector double a, __vector double b);
    +__vector double Sleef_powd2_u10zvector2(__vector double a, __vector double b);
    +__vector double Sleef_powd2_u10zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_cinz_powd2_u10zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_finz_powd2_u10zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_pow_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision power function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_powf1_u10purec(float a, float b);
    +float Sleef_powf1_u10purecfma(float a, float b);
    +float Sleef_cinz_powf1_u10purec(float a, float b);
    +float Sleef_finz_powf1_u10purecfma(float a, float b);
    +
    +__vector float Sleef_powf4_u10(__vector float a, __vector float b);
    +__vector float Sleef_powf4_u10zvector2(__vector float a, __vector float b);
    +__vector float Sleef_powf4_u10zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_cinz_powf4_u10zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_finz_powf4_u10zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_powf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision natural logarithmic function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_logd1_u10purec(double a);
    +double Sleef_logd1_u10purecfma(double a);
    +double Sleef_cinz_logd1_u10purec(double a);
    +double Sleef_finz_logd1_u10purecfma(double a);
    +
    +__vector double Sleef_logd2_u10(__vector double a);
    +__vector double Sleef_logd2_u10zvector2(__vector double a);
    +__vector double Sleef_logd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_logd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_logd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision natural logarithmic function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_logf1_u10purec(float a);
    +float Sleef_logf1_u10purecfma(float a);
    +float Sleef_cinz_logf1_u10purec(float a);
    +float Sleef_finz_logf1_u10purecfma(float a);
    +
    +__vector float Sleef_logf4_u10(__vector float a);
    +__vector float Sleef_logf4_u10zvector2(__vector float a);
    +__vector float Sleef_logf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_logf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_logf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_logf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision natural logarithmic function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_logd1_u35purec(double a);
    +double Sleef_logd1_u35purecfma(double a);
    +double Sleef_cinz_logd1_u35purec(double a);
    +double Sleef_finz_logd1_u35purecfma(double a);
    +
    +__vector double Sleef_logd2_u35(__vector double a);
    +__vector double Sleef_logd2_u35zvector2(__vector double a);
    +__vector double Sleef_logd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_logd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_logd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision natural logarithmic function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_logf1_u35purec(float a);
    +float Sleef_logf1_u35purecfma(float a);
    +float Sleef_cinz_logf1_u35purec(float a);
    +float Sleef_finz_logf1_u35purecfma(float a);
    +
    +__vector float Sleef_logf4_u35(__vector float a);
    +__vector float Sleef_logf4_u35zvector2(__vector float a);
    +__vector float Sleef_logf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_logf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_logf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_logf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_log10d1_u10purec(double a);
    +double Sleef_log10d1_u10purecfma(double a);
    +double Sleef_cinz_log10d1_u10purec(double a);
    +double Sleef_finz_log10d1_u10purecfma(double a);
    +
    +__vector double Sleef_log10d2_u10(__vector double a);
    +__vector double Sleef_log10d2_u10zvector2(__vector double a);
    +__vector double Sleef_log10d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_log10d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_log10d2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log10_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_log10f1_u10purec(float a);
    +float Sleef_log10f1_u10purecfma(float a);
    +float Sleef_cinz_log10f1_u10purec(float a);
    +float Sleef_finz_log10f1_u10purecfma(float a);
    +
    +__vector float Sleef_log10f4_u10(__vector float a);
    +__vector float Sleef_log10f4_u10zvector2(__vector float a);
    +__vector float Sleef_log10f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_log10f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_log10f4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_log2d1_u10purec(double a);
    +double Sleef_log2d1_u10purecfma(double a);
    +double Sleef_cinz_log2d1_u10purec(double a);
    +double Sleef_finz_log2d1_u10purecfma(double a);
    +
    +__vector double Sleef_log2d2_u10(__vector double a);
    +__vector double Sleef_log2d2_u10zvector2(__vector double a);
    +__vector double Sleef_log2d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_log2d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_log2d2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log2_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_log2f1_u10purec(float a);
    +float Sleef_log2f1_u10purecfma(float a);
    +float Sleef_cinz_log2f1_u10purec(float a);
    +float Sleef_finz_log2f1_u10purecfma(float a);
    +
    +__vector float Sleef_log2f4_u10(__vector float a);
    +__vector float Sleef_log2f4_u10zvector2(__vector float a);
    +__vector float Sleef_log2f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_log2f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_log2f4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_log1pd1_u10purec(double a);
    +double Sleef_log1pd1_u10purecfma(double a);
    +double Sleef_cinz_log1pd1_u10purec(double a);
    +double Sleef_finz_log1pd1_u10purecfma(double a);
    +
    +__vector double Sleef_log1pd2_u10(__vector double a);
    +__vector double Sleef_log1pd2_u10zvector2(__vector double a);
    +__vector double Sleef_log1pd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_log1pd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_log1pd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_log1pf1_u10purec(float a);
    +float Sleef_log1pf1_u10purecfma(float a);
    +float Sleef_cinz_log1pf1_u10purec(float a);
    +float Sleef_finz_log1pf1_u10purecfma(float a);
    +
    +__vector float Sleef_log1pf4_u10(__vector float a);
    +__vector float Sleef_log1pf4_u10zvector2(__vector float a);
    +__vector float Sleef_log1pf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_log1pf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_log1pf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision base-e exponential function function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_expd1_u10purec(double a);
    +double Sleef_expd1_u10purecfma(double a);
    +double Sleef_cinz_expd1_u10purec(double a);
    +double Sleef_finz_expd1_u10purecfma(double a);
    +
    +__vector double Sleef_expd2_u10(__vector double a);
    +__vector double Sleef_expd2_u10zvector2(__vector double a);
    +__vector double Sleef_expd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_expd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_expd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_exp_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision base-e exponential function function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_expf1_u10purec(float a);
    +float Sleef_expf1_u10purecfma(float a);
    +float Sleef_cinz_expf1_u10purec(float a);
    +float Sleef_finz_expf1_u10purecfma(float a);
    +
    +__vector float Sleef_expf4_u10(__vector float a);
    +__vector float Sleef_expf4_u10zvector2(__vector float a);
    +__vector float Sleef_expf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_expf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_expf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_expf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_exp2d1_u10purec(double a);
    +double Sleef_exp2d1_u10purecfma(double a);
    +double Sleef_cinz_exp2d1_u10purec(double a);
    +double Sleef_finz_exp2d1_u10purecfma(double a);
    +
    +__vector double Sleef_exp2d2_u10(__vector double a);
    +__vector double Sleef_exp2d2_u10zvector2(__vector double a);
    +__vector double Sleef_exp2d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_exp2d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_exp2d2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_exp2f1_u10purec(float a);
    +float Sleef_exp2f1_u10purecfma(float a);
    +float Sleef_cinz_exp2f1_u10purec(float a);
    +float Sleef_finz_exp2f1_u10purecfma(float a);
    +
    +__vector float Sleef_exp2f4_u10(__vector float a);
    +__vector float Sleef_exp2f4_u10zvector2(__vector float a);
    +__vector float Sleef_exp2f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_exp2f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_exp2f4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision base-10 exponential function function with 1.09 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_exp10d1_u10purec(double a);
    +double Sleef_exp10d1_u10purecfma(double a);
    +double Sleef_cinz_exp10d1_u10purec(double a);
    +double Sleef_finz_exp10d1_u10purecfma(double a);
    +
    +__vector double Sleef_exp10d2_u10(__vector double a);
    +__vector double Sleef_exp10d2_u10zvector2(__vector double a);
    +__vector double Sleef_exp10d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_exp10d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_exp10d2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_exp10f1_u10purec(float a);
    +float Sleef_exp10f1_u10purecfma(float a);
    +float Sleef_cinz_exp10f1_u10purec(float a);
    +float Sleef_finz_exp10f1_u10purecfma(float a);
    +
    +__vector float Sleef_exp10f4_u10(__vector float a);
    +__vector float Sleef_exp10f4_u10zvector2(__vector float a);
    +__vector float Sleef_exp10f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_exp10f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_exp10f4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_expm1d1_u10purec(double a);
    +double Sleef_expm1d1_u10purecfma(double a);
    +double Sleef_cinz_expm1d1_u10purec(double a);
    +double Sleef_finz_expm1d1_u10purecfma(double a);
    +
    +__vector double Sleef_expm1d2_u10(__vector double a);
    +__vector double Sleef_expm1d2_u10zvector2(__vector double a);
    +__vector double Sleef_expm1d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_expm1d2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_expm1d2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_expm1f1_u10purec(float a);
    +float Sleef_expm1f1_u10purecfma(float a);
    +float Sleef_cinz_expm1f1_u10purec(float a);
    +float Sleef_finz_expm1f1_u10purecfma(float a);
    +
    +__vector float Sleef_expm1f4_u10(__vector float a);
    +__vector float Sleef_expm1f4_u10zvector2(__vector float a);
    +__vector float Sleef_expm1f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_expm1f4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_expm1f4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision square root function with 0.5001 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_sqrtd1_u05purec(double a);
    +double Sleef_sqrtd1_u05purecfma(double a);
    +double Sleef_cinz_sqrtd1_u05purec(double a);
    +double Sleef_finz_sqrtd1_u05purecfma(double a);
    +
    +__vector double Sleef_sqrtd2_u05(__vector double a);
    +__vector double Sleef_sqrtd2_u05zvector2(__vector double a);
    +__vector double Sleef_sqrtd2_u05zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sqrtd2_u05zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sqrtd2_u05zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification. +

    + +
    +

    Vectorized single precision square root function with 0.5001 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_sqrtf1_u05purec(float a);
    +float Sleef_sqrtf1_u05purecfma(float a);
    +float Sleef_cinz_sqrtf1_u05purec(float a);
    +float Sleef_finz_sqrtf1_u05purecfma(float a);
    +
    +__vector float Sleef_sqrtf4_u05(__vector float a);
    +__vector float Sleef_sqrtf4_u05zvector2(__vector float a);
    +__vector float Sleef_sqrtf4_u05zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sqrtf4_u05zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sqrtf4_u05zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification. +

    + +
    +

    Vectorized double precision square root function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_sqrtd1_u35purec(double a);
    +double Sleef_sqrtd1_u35purecfma(double a);
    +double Sleef_cinz_sqrtd1_u35purec(double a);
    +double Sleef_finz_sqrtd1_u35purecfma(double a);
    +
    +__vector double Sleef_sqrtd2_u35(__vector double a);
    +__vector double Sleef_sqrtd2_u35zvector2(__vector double a);
    +__vector double Sleef_sqrtd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sqrtd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sqrtd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision square root function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_sqrtf1_u35purec(float a);
    +float Sleef_sqrtf1_u35purecfma(float a);
    +float Sleef_cinz_sqrtf1_u35purec(float a);
    +float Sleef_finz_sqrtf1_u35purecfma(float a);
    +
    +__vector float Sleef_sqrtf4_u35(__vector float a);
    +__vector float Sleef_sqrtf4_u35zvector2(__vector float a);
    +__vector float Sleef_sqrtf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sqrtf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sqrtf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision cubic root function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_cbrtd1_u10purec(double a);
    +double Sleef_cbrtd1_u10purecfma(double a);
    +double Sleef_cinz_cbrtd1_u10purec(double a);
    +double Sleef_finz_cbrtd1_u10purecfma(double a);
    +
    +__vector double Sleef_cbrtd2_u10(__vector double a);
    +__vector double Sleef_cbrtd2_u10zvector2(__vector double a);
    +__vector double Sleef_cbrtd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_cbrtd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_cbrtd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision cubic root function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_cbrtf1_u10purec(float a);
    +float Sleef_cbrtf1_u10purecfma(float a);
    +float Sleef_cinz_cbrtf1_u10purec(float a);
    +float Sleef_finz_cbrtf1_u10purecfma(float a);
    +
    +__vector float Sleef_cbrtf4_u10(__vector float a);
    +__vector float Sleef_cbrtf4_u10zvector2(__vector float a);
    +__vector float Sleef_cbrtf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_cbrtf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_cbrtf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision cubic root function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_cbrtd1_u35purec(double a);
    +double Sleef_cbrtd1_u35purecfma(double a);
    +double Sleef_cinz_cbrtd1_u35purec(double a);
    +double Sleef_finz_cbrtd1_u35purecfma(double a);
    +
    +__vector double Sleef_cbrtd2_u35(__vector double a);
    +__vector double Sleef_cbrtd2_u35zvector2(__vector double a);
    +__vector double Sleef_cbrtd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_cbrtd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_cbrtd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision cubic root function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_cbrtf1_u35purec(float a);
    +float Sleef_cbrtf1_u35purecfma(float a);
    +float Sleef_cinz_cbrtf1_u35purec(float a);
    +float Sleef_finz_cbrtf1_u35purecfma(float a);
    +
    +__vector float Sleef_cbrtf4_u35(__vector float a);
    +__vector float Sleef_cbrtf4_u35zvector2(__vector float a);
    +__vector float Sleef_cbrtf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_cbrtf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_cbrtf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_hypotd1_u05purec(double a, double b);
    +double Sleef_hypotd1_u05purecfma(double a, double b);
    +double Sleef_cinz_hypotd1_u05purec(double a, double b);
    +double Sleef_finz_hypotd1_u05purecfma(double a, double b);
    +
    +__vector double Sleef_hypotd2_u05(__vector double a, __vector double b);
    +__vector double Sleef_hypotd2_u05zvector2(__vector double a, __vector double b);
    +__vector double Sleef_hypotd2_u05zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_cinz_hypotd2_u05zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_finz_hypotd2_u05zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification. +

    + +
    +

    Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_hypotf1_u05purec(float a, float b);
    +float Sleef_hypotf1_u05purecfma(float a, float b);
    +float Sleef_cinz_hypotf1_u05purec(float a, float b);
    +float Sleef_finz_hypotf1_u05purecfma(float a, float b);
    +
    +__vector float Sleef_hypotf4_u05(__vector float a, __vector float b);
    +__vector float Sleef_hypotf4_u05zvector2(__vector float a, __vector float b);
    +__vector float Sleef_hypotf4_u05zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_cinz_hypotf4_u05zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_finz_hypotf4_u05zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification. +

    + +
    +

    Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_hypotd1_u35purec(double a, double b);
    +double Sleef_hypotd1_u35purecfma(double a, double b);
    +double Sleef_cinz_hypotd1_u35purec(double a, double b);
    +double Sleef_finz_hypotd1_u35purecfma(double a, double b);
    +
    +__vector double Sleef_hypotd2_u35(__vector double a, __vector double b);
    +__vector double Sleef_hypotd2_u35zvector2(__vector double a, __vector double b);
    +__vector double Sleef_hypotd2_u35zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_cinz_hypotd2_u35zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_finz_hypotd2_u35zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_hypotf1_u35purec(float a, float b);
    +float Sleef_hypotf1_u35purecfma(float a, float b);
    +float Sleef_cinz_hypotf1_u35purec(float a, float b);
    +float Sleef_finz_hypotf1_u35purecfma(float a, float b);
    +
    +__vector float Sleef_hypotf4_u35(__vector float a, __vector float b);
    +__vector float Sleef_hypotf4_u35zvector2(__vector float a, __vector float b);
    +__vector float Sleef_hypotf4_u35zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_cinz_hypotf4_u35zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_finz_hypotf4_u35zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification. +

    + + +

    Inverse Trigonometric Functions

    + +

    Vectorized double precision arc sine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_asind1_u10purec(double a);
    +double Sleef_asind1_u10purecfma(double a);
    +double Sleef_cinz_asind1_u10purec(double a);
    +double Sleef_finz_asind1_u10purecfma(double a);
    +
    +__vector double Sleef_asind2_u10(__vector double a);
    +__vector double Sleef_asind2_u10zvector2(__vector double a);
    +__vector double Sleef_asind2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_asind2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_asind2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_asin_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc sine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_asinf1_u10purec(float a);
    +float Sleef_asinf1_u10purecfma(float a);
    +float Sleef_cinz_asinf1_u10purec(float a);
    +float Sleef_finz_asinf1_u10purecfma(float a);
    +
    +__vector float Sleef_asinf4_u10(__vector float a);
    +__vector float Sleef_asinf4_u10zvector2(__vector float a);
    +__vector float Sleef_asinf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_asinf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_asinf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc sine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_asind1_u35purec(double a);
    +double Sleef_asind1_u35purecfma(double a);
    +double Sleef_cinz_asind1_u35purec(double a);
    +double Sleef_finz_asind1_u35purecfma(double a);
    +
    +__vector double Sleef_asind2_u35(__vector double a);
    +__vector double Sleef_asind2_u35zvector2(__vector double a);
    +__vector double Sleef_asind2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_asind2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_asind2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_asin_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc sine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_asinf1_u35purec(float a);
    +float Sleef_asinf1_u35purecfma(float a);
    +float Sleef_cinz_asinf1_u35purec(float a);
    +float Sleef_finz_asinf1_u35purecfma(float a);
    +
    +__vector float Sleef_asinf4_u35(__vector float a);
    +__vector float Sleef_asinf4_u35zvector2(__vector float a);
    +__vector float Sleef_asinf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_asinf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_asinf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc cosine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_acosd1_u10purec(double a);
    +double Sleef_acosd1_u10purecfma(double a);
    +double Sleef_cinz_acosd1_u10purec(double a);
    +double Sleef_finz_acosd1_u10purecfma(double a);
    +
    +__vector double Sleef_acosd2_u10(__vector double a);
    +__vector double Sleef_acosd2_u10zvector2(__vector double a);
    +__vector double Sleef_acosd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_acosd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_acosd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_acos_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc cosine function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_acosf1_u10purec(float a);
    +float Sleef_acosf1_u10purecfma(float a);
    +float Sleef_cinz_acosf1_u10purec(float a);
    +float Sleef_finz_acosf1_u10purecfma(float a);
    +
    +__vector float Sleef_acosf4_u10(__vector float a);
    +__vector float Sleef_acosf4_u10zvector2(__vector float a);
    +__vector float Sleef_acosf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_acosf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_acosf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_acosd1_u35purec(double a);
    +double Sleef_acosd1_u35purecfma(double a);
    +double Sleef_cinz_acosd1_u35purec(double a);
    +double Sleef_finz_acosd1_u35purecfma(double a);
    +
    +__vector double Sleef_acosd2_u35(__vector double a);
    +__vector double Sleef_acosd2_u35zvector2(__vector double a);
    +__vector double Sleef_acosd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_acosd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_acosd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_acos_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc cosine function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_acosf1_u35purec(float a);
    +float Sleef_acosf1_u35purecfma(float a);
    +float Sleef_cinz_acosf1_u35purec(float a);
    +float Sleef_finz_acosf1_u35purecfma(float a);
    +
    +__vector float Sleef_acosf4_u35(__vector float a);
    +__vector float Sleef_acosf4_u35zvector2(__vector float a);
    +__vector float Sleef_acosf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_acosf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_acosf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc tangent function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_atand1_u10purec(double a);
    +double Sleef_atand1_u10purecfma(double a);
    +double Sleef_cinz_atand1_u10purec(double a);
    +double Sleef_finz_atand1_u10purecfma(double a);
    +
    +__vector double Sleef_atand2_u10(__vector double a);
    +__vector double Sleef_atand2_u10zvector2(__vector double a);
    +__vector double Sleef_atand2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_atand2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_atand2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atan_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc tangent function with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_atanf1_u10purec(float a);
    +float Sleef_atanf1_u10purecfma(float a);
    +float Sleef_cinz_atanf1_u10purec(float a);
    +float Sleef_finz_atanf1_u10purecfma(float a);
    +
    +__vector float Sleef_atanf4_u10(__vector float a);
    +__vector float Sleef_atanf4_u10zvector2(__vector float a);
    +__vector float Sleef_atanf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_atanf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_atanf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc tangent function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_atand1_u35purec(double a);
    +double Sleef_atand1_u35purecfma(double a);
    +double Sleef_cinz_atand1_u35purec(double a);
    +double Sleef_finz_atand1_u35purecfma(double a);
    +
    +__vector double Sleef_atand2_u35(__vector double a);
    +__vector double Sleef_atand2_u35zvector2(__vector double a);
    +__vector double Sleef_atand2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_atand2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_atand2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atan_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc tangent function with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_atanf1_u35purec(float a);
    +float Sleef_atanf1_u35purecfma(float a);
    +float Sleef_cinz_atanf1_u35purec(float a);
    +float Sleef_finz_atanf1_u35purecfma(float a);
    +
    +__vector float Sleef_atanf4_u35(__vector float a);
    +__vector float Sleef_atanf4_u35zvector2(__vector float a);
    +__vector float Sleef_atanf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_atanf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_atanf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_atan2d1_u10purec(double a, double b);
    +double Sleef_atan2d1_u10purecfma(double a, double b);
    +double Sleef_cinz_atan2d1_u10purec(double a, double b);
    +double Sleef_finz_atan2d1_u10purecfma(double a, double b);
    +
    +__vector double Sleef_atan2d2_u10(__vector double a, __vector double b);
    +__vector double Sleef_atan2d2_u10zvector2(__vector double a, __vector double b);
    +__vector double Sleef_atan2d2_u10zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_cinz_atan2d2_u10zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_finz_atan2d2_u10zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_atan2f1_u10purec(float a, float b);
    +float Sleef_atan2f1_u10purecfma(float a, float b);
    +float Sleef_cinz_atan2f1_u10purec(float a, float b);
    +float Sleef_finz_atan2f1_u10purecfma(float a, float b);
    +
    +__vector float Sleef_atan2f4_u10(__vector float a, __vector float b);
    +__vector float Sleef_atan2f4_u10zvector2(__vector float a, __vector float b);
    +__vector float Sleef_atan2f4_u10zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_cinz_atan2f4_u10zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_finz_atan2f4_u10zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_atan2d1_u35purec(double a, double b);
    +double Sleef_atan2d1_u35purecfma(double a, double b);
    +double Sleef_cinz_atan2d1_u35purec(double a, double b);
    +double Sleef_finz_atan2d1_u35purecfma(double a, double b);
    +
    +__vector double Sleef_atan2d2_u35(__vector double a, __vector double b);
    +__vector double Sleef_atan2d2_u35zvector2(__vector double a, __vector double b);
    +__vector double Sleef_atan2d2_u35zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_cinz_atan2d2_u35zvector2nofma(__vector double a, __vector double b);
    +__vector double Sleef_finz_atan2d2_u35zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_atan2f1_u35purec(float a, float b);
    +float Sleef_atan2f1_u35purecfma(float a, float b);
    +float Sleef_cinz_atan2f1_u35purec(float a, float b);
    +float Sleef_finz_atan2f1_u35purecfma(float a, float b);
    +
    +__vector float Sleef_atan2f4_u35(__vector float a, __vector float b);
    +__vector float Sleef_atan2f4_u35zvector2(__vector float a, __vector float b);
    +__vector float Sleef_atan2f4_u35zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_cinz_atan2f4_u35zvector2nofma(__vector float a, __vector float b);
    +__vector float Sleef_finz_atan2f4_u35zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification. +

    + + + +

    Hyperbolic function and inverse hyperbolic function

    + +

    Vectorized double precision hyperbolic sine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_sinhd1_u10purec(double a);
    +double Sleef_sinhd1_u10purecfma(double a);
    +double Sleef_cinz_sinhd1_u10purec(double a);
    +double Sleef_finz_sinhd1_u10purecfma(double a);
    +
    +__vector double Sleef_sinhd2_u10(__vector double a);
    +__vector double Sleef_sinhd2_u10zvector2(__vector double a);
    +__vector double Sleef_sinhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sinhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sinhd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision hyperbolic sine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_sinhf1_u10purec(float a);
    +float Sleef_sinhf1_u10purecfma(float a);
    +float Sleef_cinz_sinhf1_u10purec(float a);
    +float Sleef_finz_sinhf1_u10purecfma(float a);
    +
    +__vector float Sleef_sinhf4_u10(__vector float a);
    +__vector float Sleef_sinhf4_u10zvector2(__vector float a);
    +__vector float Sleef_sinhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sinhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sinhf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision hyperbolic sine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_sinhd1_u35purec(double a);
    +double Sleef_sinhd1_u35purecfma(double a);
    +double Sleef_cinz_sinhd1_u35purec(double a);
    +double Sleef_finz_sinhd1_u35purecfma(double a);
    +
    +__vector double Sleef_sinhd2_u35(__vector double a);
    +__vector double Sleef_sinhd2_u35zvector2(__vector double a);
    +__vector double Sleef_sinhd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_sinhd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_sinhd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision hyperbolic sine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_sinhf1_u35purec(float a);
    +float Sleef_sinhf1_u35purecfma(float a);
    +float Sleef_cinz_sinhf1_u35purec(float a);
    +float Sleef_finz_sinhf1_u35purecfma(float a);
    +
    +__vector float Sleef_sinhf4_u35(__vector float a);
    +__vector float Sleef_sinhf4_u35zvector2(__vector float a);
    +__vector float Sleef_sinhf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_sinhf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_sinhf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision hyperbolic cosine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_coshd1_u10purec(double a);
    +double Sleef_coshd1_u10purecfma(double a);
    +double Sleef_cinz_coshd1_u10purec(double a);
    +double Sleef_finz_coshd1_u10purecfma(double a);
    +
    +__vector double Sleef_coshd2_u10(__vector double a);
    +__vector double Sleef_coshd2_u10zvector2(__vector double a);
    +__vector double Sleef_coshd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_coshd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_coshd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision hyperbolic cosine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_coshf1_u10purec(float a);
    +float Sleef_coshf1_u10purecfma(float a);
    +float Sleef_cinz_coshf1_u10purec(float a);
    +float Sleef_finz_coshf1_u10purecfma(float a);
    +
    +__vector float Sleef_coshf4_u10(__vector float a);
    +__vector float Sleef_coshf4_u10zvector2(__vector float a);
    +__vector float Sleef_coshf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_coshf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_coshf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision hyperbolic cosine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_coshd1_u35purec(double a);
    +double Sleef_coshd1_u35purecfma(double a);
    +double Sleef_cinz_coshd1_u35purec(double a);
    +double Sleef_finz_coshd1_u35purecfma(double a);
    +
    +__vector double Sleef_coshd2_u35(__vector double a);
    +__vector double Sleef_coshd2_u35zvector2(__vector double a);
    +__vector double Sleef_coshd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_coshd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_coshd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision hyperbolic cosine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_coshf1_u35purec(float a);
    +float Sleef_coshf1_u35purecfma(float a);
    +float Sleef_cinz_coshf1_u35purec(float a);
    +float Sleef_finz_coshf1_u35purecfma(float a);
    +
    +__vector float Sleef_coshf4_u35(__vector float a);
    +__vector float Sleef_coshf4_u35zvector2(__vector float a);
    +__vector float Sleef_coshf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_coshf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_coshf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision hyperbolic tangent function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_tanhd1_u10purec(double a);
    +double Sleef_tanhd1_u10purecfma(double a);
    +double Sleef_cinz_tanhd1_u10purec(double a);
    +double Sleef_finz_tanhd1_u10purecfma(double a);
    +
    +__vector double Sleef_tanhd2_u10(__vector double a);
    +__vector double Sleef_tanhd2_u10zvector2(__vector double a);
    +__vector double Sleef_tanhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_tanhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_tanhd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision hyperbolic tangent function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_tanhf1_u10purec(float a);
    +float Sleef_tanhf1_u10purecfma(float a);
    +float Sleef_cinz_tanhf1_u10purec(float a);
    +float Sleef_finz_tanhf1_u10purecfma(float a);
    +
    +__vector float Sleef_tanhf4_u10(__vector float a);
    +__vector float Sleef_tanhf4_u10zvector2(__vector float a);
    +__vector float Sleef_tanhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_tanhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_tanhf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision hyperbolic tangent function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +double Sleef_tanhd1_u35purec(double a);
    +double Sleef_tanhd1_u35purecfma(double a);
    +double Sleef_cinz_tanhd1_u35purec(double a);
    +double Sleef_finz_tanhd1_u35purecfma(double a);
    +
    +__vector double Sleef_tanhd2_u35(__vector double a);
    +__vector double Sleef_tanhd2_u35zvector2(__vector double a);
    +__vector double Sleef_tanhd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_tanhd2_u35zvector2nofma(__vector double a);
    +__vector double Sleef_finz_tanhd2_u35zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification. +

    + +
    +

    Vectorized single precision hyperbolic tangent function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +
    +float Sleef_tanhf1_u35purec(float a);
    +float Sleef_tanhf1_u35purecfma(float a);
    +float Sleef_cinz_tanhf1_u35purec(float a);
    +float Sleef_finz_tanhf1_u35purecfma(float a);
    +
    +__vector float Sleef_tanhf4_u35(__vector float a);
    +__vector float Sleef_tanhf4_u35zvector2(__vector float a);
    +__vector float Sleef_tanhf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_tanhf4_u35zvector2nofma(__vector float a);
    +__vector float Sleef_finz_tanhf4_u35zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification. +

    + +
    +

    Vectorized double precision inverse hyperbolic sine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_asinhd1_u10purec(double a);
    +double Sleef_asinhd1_u10purecfma(double a);
    +double Sleef_cinz_asinhd1_u10purec(double a);
    +double Sleef_finz_asinhd1_u10purecfma(double a);
    +
    +__vector double Sleef_asinhd2_u10(__vector double a);
    +__vector double Sleef_asinhd2_u10zvector2(__vector double a);
    +__vector double Sleef_asinhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_asinhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_asinhd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision inverse hyperbolic sine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_asinhf1_u10purec(float a);
    +float Sleef_asinhf1_u10purecfma(float a);
    +float Sleef_cinz_asinhf1_u10purec(float a);
    +float Sleef_finz_asinhf1_u10purecfma(float a);
    +
    +__vector float Sleef_asinhf4_u10(__vector float a);
    +__vector float Sleef_asinhf4_u10zvector2(__vector float a);
    +__vector float Sleef_asinhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_asinhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_asinhf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision inverse hyperbolic cosine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_acoshd1_u10purec(double a);
    +double Sleef_acoshd1_u10purecfma(double a);
    +double Sleef_cinz_acoshd1_u10purec(double a);
    +double Sleef_finz_acoshd1_u10purecfma(double a);
    +
    +__vector double Sleef_acoshd2_u10(__vector double a);
    +__vector double Sleef_acoshd2_u10zvector2(__vector double a);
    +__vector double Sleef_acoshd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_acoshd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_acoshd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision inverse hyperbolic cosine function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_acoshf1_u10purec(float a);
    +float Sleef_acoshf1_u10purecfma(float a);
    +float Sleef_cinz_acoshf1_u10purec(float a);
    +float Sleef_finz_acoshf1_u10purecfma(float a);
    +
    +__vector float Sleef_acoshf4_u10(__vector float a);
    +__vector float Sleef_acoshf4_u10zvector2(__vector float a);
    +__vector float Sleef_acoshf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_acoshf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_acoshf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision inverse hyperbolic tangent function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_atanhd1_u10purec(double a);
    +double Sleef_atanhd1_u10purecfma(double a);
    +double Sleef_cinz_atanhd1_u10purec(double a);
    +double Sleef_finz_atanhd1_u10purecfma(double a);
    +
    +__vector double Sleef_atanhd2_u10(__vector double a);
    +__vector double Sleef_atanhd2_u10zvector2(__vector double a);
    +__vector double Sleef_atanhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_atanhd2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_atanhd2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision inverse hyperbolic tangent function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_atanhf1_u10purec(float a);
    +float Sleef_atanhf1_u10purecfma(float a);
    +float Sleef_cinz_atanhf1_u10purec(float a);
    +float Sleef_finz_atanhf1_u10purecfma(float a);
    +
    +__vector float Sleef_atanhf4_u10(__vector float a);
    +__vector float Sleef_atanhf4_u10zvector2(__vector float a);
    +__vector float Sleef_atanhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_atanhf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_atanhf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification. +

    + + +

    Error and gamma function

    + +

    Vectorized double precision error function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_erfd1_u10purec(float a);
    +float Sleef_erfd1_u10purecfma(float a);
    +float Sleef_cinz_erfd1_u10purec(float a);
    +float Sleef_finz_erfd1_u10purecfma(float a);
    +
    +(SP2) Sleef_erfd2_u10((SP2) a);
    +(SP2) Sleef_erfd2_u10zvector2((SP2) a);
    +(SP2) Sleef_erfd2_u10zvector2nofma((SP2) a);
    +(SP2) Sleef_cinz_erfd2_u10zvector2nofma((SP2) a);
    +(SP2) Sleef_finz_erfd2_u10zvector2((SP2) a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_erf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision error function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_erff1_u10purec(float a);
    +float Sleef_erff1_u10purecfma(float a);
    +float Sleef_cinz_erff1_u10purec(float a);
    +float Sleef_finz_erff1_u10purecfma(float a);
    +
    +__vector float Sleef_erff4_u10(__vector float a);
    +__vector float Sleef_erff4_u10zvector2(__vector float a);
    +__vector float Sleef_erff4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_erff4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_erff4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_erff_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision complementary error function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_erfcd1_u15purec(double a);
    +double Sleef_erfcd1_u15purecfma(double a);
    +double Sleef_cinz_erfcd1_u15purec(double a);
    +double Sleef_finz_erfcd1_u15purecfma(double a);
    +
    +__vector double Sleef_erfcd2_u15(__vector double a);
    +__vector double Sleef_erfcd2_u15zvector2(__vector double a);
    +__vector double Sleef_erfcd2_u15zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_erfcd2_u15zvector2nofma(__vector double a);
    +__vector double Sleef_finz_erfcd2_u15zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification. +

    + +
    +

    Vectorized single precision complementary error function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_erfcf1_u15purec(float a);
    +float Sleef_erfcf1_u15purecfma(float a);
    +float Sleef_cinz_erfcf1_u15purec(float a);
    +float Sleef_finz_erfcf1_u15purecfma(float a);
    +
    +__vector float Sleef_erfcf4_u15(__vector float a);
    +__vector float Sleef_erfcf4_u15zvector2(__vector float a);
    +__vector float Sleef_erfcf4_u15zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_erfcf4_u15zvector2nofma(__vector float a);
    +__vector float Sleef_finz_erfcf4_u15zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification. +

    + +
    +

    Vectorized double precision gamma function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_tgammad1_u10purec(double a);
    +double Sleef_tgammad1_u10purecfma(double a);
    +double Sleef_cinz_tgammad1_u10purec(double a);
    +double Sleef_finz_tgammad1_u10purecfma(double a);
    +
    +__vector double Sleef_tgammad2_u10(__vector double a);
    +__vector double Sleef_tgammad2_u10zvector2(__vector double a);
    +__vector double Sleef_tgammad2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_tgammad2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_tgammad2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision gamma function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_tgammaf1_u10purec(float a);
    +float Sleef_tgammaf1_u10purecfma(float a);
    +float Sleef_cinz_tgammaf1_u10purec(float a);
    +float Sleef_finz_tgammaf1_u10purecfma(float a);
    +
    +__vector float Sleef_tgammaf4_u10(__vector float a);
    +__vector float Sleef_tgammaf4_u10zvector2(__vector float a);
    +__vector float Sleef_tgammaf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_tgammaf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_tgammaf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification. +

    + +
    +

    Vectorized double precision log gamma function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +double Sleef_lgammad1_u10purec(double a);
    +double Sleef_lgammad1_u10purecfma(double a);
    +double Sleef_cinz_lgammad1_u10purec(double a);
    +double Sleef_finz_lgammad1_u10purecfma(double a);
    +
    +__vector double Sleef_lgammad2_u10(__vector double a);
    +__vector double Sleef_lgammad2_u10zvector2(__vector double a);
    +__vector double Sleef_lgammad2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_cinz_lgammad2_u10zvector2nofma(__vector double a);
    +__vector double Sleef_finz_lgammad2_u10zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification. +

    + +
    +

    Vectorized single precision log gamma function

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +float Sleef_lgammaf1_u10purec(float a);
    +float Sleef_lgammaf1_u10purecfma(float a);
    +float Sleef_cinz_lgammaf1_u10purec(float a);
    +float Sleef_finz_lgammaf1_u10purecfma(float a);
    +
    +__vector float Sleef_lgammaf4_u10(__vector float a);
    +__vector float Sleef_lgammaf4_u10zvector2(__vector float a);
    +__vector float Sleef_lgammaf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_cinz_lgammaf4_u10zvector2nofma(__vector float a);
    +__vector float Sleef_finz_lgammaf4_u10zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification. +

    + + +

    Nearest integer function

    + +

    Vectorized double precision function for rounding to integer towards zero

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_truncd2(__vector double a);
    +__vector double Sleef_truncd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_trunc with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for rounding to integer towards zero

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_truncf4(__vector float a);
    +__vector float Sleef_truncf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_truncf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for rounding to integer towards negative infinity

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_floord2(__vector double a);
    +__vector double Sleef_floord2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_floor with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for rounding to integer towards negative infinity

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_floorf4(__vector float a);
    +__vector float Sleef_floorf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_floorf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for rounding to integer towards positive infinity

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_ceild2(__vector double a);
    +__vector double Sleef_ceild2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_ceil with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for rounding to integer towards positive infinity

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_ceilf4(__vector float a);
    +__vector float Sleef_ceilf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_ceilf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for rounding to nearest integer

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_roundd2(__vector double a);
    +__vector double Sleef_roundd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_round with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for rounding to nearest integer

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_roundf4(__vector float a);
    +__vector float Sleef_roundf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_roundf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for rounding to nearest integer

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_rintd2(__vector double a);
    +__vector double Sleef_rintd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_rint with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for rounding to nearest integer

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_rintf4(__vector float a);
    +__vector float Sleef_rintf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_rintf with the same accuracy specification. +

    + + +

    Other function

    + +

    Vectorized double precision function for fused multiply-accumulation

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_fmad2(__vector double a, __vector double b, __vector double c);
    +__vector double Sleef_fmad2_zvector2(__vector double a, __vector double b, __vector double c);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fma with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for fused multiply-accumulation

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_fmaf4(__vector float a, __vector float b, __vector float c);
    +__vector float Sleef_fmaf4_zvector2(__vector float a, __vector float b, __vector float c);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fmaf with the same accuracy specification. +

    + +
    + +

    Vectorized double precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_fmodd2(__vector double a, __vector double b);
    +__vector double Sleef_fmodd2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fmod with the same accuracy specification. +

    + +
    +

    Vectorized single precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_fmodf4(__vector float a, __vector float b);
    +__vector float Sleef_fmodf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fmodf with the same accuracy specification. +

    + +
    + +

    Vectorized double precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_remainderd2(__vector double a, __vector double b);
    +__vector double Sleef_remainderd2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainder with the same accuracy specification. +

    + +
    +

    Vectorized single precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_remainderf4(__vector float a, __vector float b);
    +__vector float Sleef_remainderf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_remainderf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for multiplying by integral power of 2

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_ldexpd2(__vector double a, vector int b);
    +__vector double Sleef_ldexpd2_zvector2(__vector double a, vector int b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_ldexp with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for obtaining fractional component of an FP number

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_frfrexpd2(__vector double a);
    +__vector double Sleef_frfrexpd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_frfrexp with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for obtaining fractional component of an FP number

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_frfrexpf4(__vector float a);
    +__vector float Sleef_frfrexpf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_frfrexpf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for obtaining integral component of an FP number

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +vector int Sleef_expfrexpd2(__vector double a);
    +vector int Sleef_expfrexpd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_expfrexp with the same accuracy specification. +

    + +
    + +

    Vectorized double precision function for getting integer exponent

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +vector int Sleef_ilogbd2(__vector double a);
    +vector int Sleef_ilogbd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_ilogb with the same accuracy specification. +

    + +
    +

    Vectorized double precision signed integral and fractional values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_modfd2(__vector double a);
    +Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_modfd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_modf with the same accuracy specification. +

    + +
    +

    Vectorized single precision signed integral and fractional values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_modff4(__vector float a);
    +Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_modff4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_modff with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for calculating the absolute value

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_fabsd2(__vector double a);
    +__vector double Sleef_fabsd2_zvector2(__vector double a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fabs with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for calculating the absolute value

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_fabsf4(__vector float a);
    +__vector float Sleef_fabsf4_zvector2(__vector float a);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fabsf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for copying signs

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_copysignd2(__vector double a, __vector double b);
    +__vector double Sleef_copysignd2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_copysign with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for copying signs

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_copysignf4(__vector float a, __vector float b);
    +__vector float Sleef_copysignf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_copysignf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for determining maximum of two values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_fmaxd2(__vector double a, __vector double b);
    +__vector double Sleef_fmaxd2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fmax with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for determining maximum of two values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_fmaxf4(__vector float a, __vector float b);
    +__vector float Sleef_fmaxf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fmaxf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for determining minimum of two values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_fmind2(__vector double a, __vector double b);
    +__vector double Sleef_fmind2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fmin with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for determining minimum of two values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_fminf4(__vector float a, __vector float b);
    +__vector float Sleef_fminf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fminf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function to calculate positive difference of two values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_fdimd2(__vector double a, __vector double b);
    +__vector double Sleef_fdimd2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fdim with the same accuracy specification. +

    + +
    +

    Vectorized single precision function to calculate positive difference of two values

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_fdimf4(__vector float a, __vector float b);
    +__vector float Sleef_fdimf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_fdimf with the same accuracy specification. +

    + +
    +

    Vectorized double precision function for obtaining the next representable FP value

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector double Sleef_nextafterd2(__vector double a, __vector double b);
    +__vector double Sleef_nextafterd2_zvector2(__vector double a, __vector double b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_nextafter with the same accuracy specification. +

    + +
    +

    Vectorized single precision function for obtaining the next representable FP value

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__vector float Sleef_nextafterf4(__vector float a, __vector float b);
    +__vector float Sleef_nextafterf4_zvector2(__vector float a, __vector float b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +This is the vectorized function of Sleef_nextafterf with the same accuracy specification. +

    + + + + + + + diff --git a/doc/html/x86.xhtml b/doc/html/x86.xhtml index ef7f952d..eb06cd6d 100644 --- a/doc/html/x86.xhtml +++ b/doc/html/x86.xhtml @@ -10,10 +10,10 @@ -SLEEF Documentation +SLEEF - Math library reference(x86) -

    SLEEF Documentation - Math library reference

    +

    SLEEF Documentation - Math library reference(x86)

    Table of contents

    @@ -40,6 +40,7 @@
  • Data types and functions for AArch64 architecture
  • Data types and functions for AArch32 architecture
  • Data types and functions for PPC64 architecture
  • +
  • Data types and functions for System/390 architecture
  •  
  • @@ -4978,6 +4979,67 @@ These are the vectorized functions of Sleef_fmodf with the same accuracy specification.

    +
    + +

    Vectorized double precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__m128d Sleef_remainderd2(__m128d a, __m128d b);
    +__m128d Sleef_remainderd2_sse2(__m128d a, __m128d b);
    +__m128d Sleef_remainderd2_sse4(__m128d a, __m128d b);
    +__m128d Sleef_remainderd2_avx2128(__m128d a, __m128d b);
    +
    +__m256d Sleef_remainderd4(__m256d a, __m256d b);
    +__m256d Sleef_remainderd4_avx(__m256d a, __m256d b);
    +__m256d Sleef_remainderd4_fma4(__m256d a, __m256d b);
    +__m256d Sleef_remainderd4_avx2(__m256d a, __m256d b);
    +
    +__m512d Sleef_remainderd8(__m512d a, __m512d b);
    +__m512d Sleef_remainderd8_avx512f(__m512d a, __m512d b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +These are the vectorized functions of Sleef_remainder with the same accuracy specification. +

    + +
    +

    Vectorized single precision FP remainder

    + +

    Synopsis

    + +

    +#include <sleef.h>
    +
    +__m128 Sleef_remainderf4(__m128 a, __m128 b);
    +__m128 Sleef_remainderf4_sse2(__m128 a, __m128 b);
    +__m128 Sleef_remainderf4_sse4(__m128 a, __m128 b);
    +__m128 Sleef_remainderf4_avx2128(__m128 a, __m128 b);
    +
    +__m256 Sleef_remainderf8(__m256 a, __m256 b);
    +__m256 Sleef_remainderf8_avx(__m256 a, __m256 b);
    +__m256 Sleef_remainderf8_fma4(__m256 a, __m256 b);
    +__m256 Sleef_remainderf8_avx2(__m256 a, __m256 b);
    +
    +__m512 Sleef_remainderf16(__m512 a, __m512 b);
    +__m512 Sleef_remainderf16_avx512f(__m512 a, __m512 b);
    +
    +Link with -lsleef. +

    + +

    Description

    + +

    +These are the vectorized functions of Sleef_remainderf with the same accuracy specification. +

    +

    Vectorized double precision functions for multiplying by integral power of 2

    diff --git a/src/gencoef/Makefile b/src/gencoef/Makefile index 7ef8bcb3..1867b705 100644 --- a/src/gencoef/Makefile +++ b/src/gencoef/Makefile @@ -1,10 +1,16 @@ .PHONY: all -all : gencoef +all : gencoef mkrempitab mkrempitabqp gencoef : gencoef.c simplexfr.c sp.h dp.h ld.h qp.h gcc -O gencoef.c simplexfr.c -o gencoef -lmpfr -lm +mkrempitab : mkrempitab.c + gcc -O mkrempitab.c -o mkrempitab -lmpfr + +mkrempitabqp : mkrempitabqp.c + gcc -O mkrempitabqp.c -o mkrempitabqp -lmpfr + .PHONY: clean clean : - rm -f gencoef gencoefdp gencoefld a.out *~ + rm -f gencoef gencoefdp gencoefld mkrempitab mkrempitabqp a.out *~ rm -f *.obj *.lib *.dll *.exp *.exe diff --git a/src/gencoef/mkrempitabqp.c b/src/gencoef/mkrempitabqp.c new file mode 100644 index 00000000..51e02c61 --- /dev/null +++ b/src/gencoef/mkrempitabqp.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#define N 8 +#define B 8 +#define NCOL (53-B) +#define NROW ((16385+(53-B)*N-106)/NCOL+1) + +static double *rempitabqp = NULL; + +void generateRempitabqp() { + rempitabqp = calloc(16385-106+(53-B)*(N+1), sizeof(double)); + + int orgprec = mpfr_get_default_prec(); + mpfr_set_default_prec(18000); + + mpfr_t pi, m, n, o; + mpfr_inits(pi, m, n, o, NULL); + mpfr_const_pi(pi, GMP_RNDN); + + mpfr_d_div(n, 0.5, pi, GMP_RNDN); + + for(int e=106;e<16385+(53-B)*N;e++) { + mpfr_set(m, n, GMP_RNDN); + + mpfr_set_ui_2exp(o, 1, -(113 - e), GMP_RNDN); + mpfr_mul(m, m, o, GMP_RNDN); + + mpfr_frac(m, m, GMP_RNDN); + + mpfr_set_ui_2exp(o, 1, (53-B), GMP_RNDN); + mpfr_mul(m, m, o, GMP_RNDN); + + mpfr_trunc(m, m); + + mpfr_set_ui_2exp(o, 1, 7-(53-B), GMP_RNDN); + mpfr_mul(m, m, o, GMP_RNDN); + + int col = (e - 106) % NCOL; + int row = (e - 106) / NCOL; + rempitabqp[col * NROW + row] = mpfr_get_d(m, GMP_RNDN); + } + + mpfr_clears(pi, m, n, o, NULL); + mpfr_set_default_prec(orgprec); +} + + +int main(int argc, char **argv) { + generateRempitabqp(); + + printf("NOEXPORT const double Sleef_rempitabqp[] = {\n "); + for(int i=0;i<16385-106+(53-B)*(N+1);i++) { + printf("%.20g, ", rempitabqp[i]); + if ((i & 3) == 3) printf("\n "); + } + printf("\n};\n"); +}