diff --git a/vectormath_exp.h b/vectormath_exp.h
index 4eb34b2..70820ef 100644
--- a/vectormath_exp.h
+++ b/vectormath_exp.h
@@ -34,7 +34,7 @@
 ******************************************************************************/
 
 #ifndef VECTORMATH_EXP_H
-#define VECTORMATH_EXP_H  2
+#define VECTORMATH_EXP_H  202
 
 #include "vectormath_common.h"
 
@@ -1547,7 +1547,7 @@ static inline VTYPE pow_template_d(VTYPE const x0, VTYPE const y) {
     // data vectors
     VTYPE x, x1, x2;                             // x variable
     VTYPE px, qx, ef, yr, v;                     // calculation of logarithm
-    VTYPE lg, lg1, lg2;
+    VTYPE lg, lg1;
     VTYPE lgerr, x2err;
     VTYPE e1, e2, ee;
     VTYPE e3, z, z1;                             // calculation of exp and pow
diff --git a/vectormath_hyp.h b/vectormath_hyp.h
index ff265bc..9f22f3f 100644
--- a/vectormath_hyp.h
+++ b/vectormath_hyp.h
@@ -29,7 +29,7 @@
 ******************************************************************************/
 
 #ifndef VECTORMATH_HYP_H
-#define VECTORMATH_HYP_H  2
+#define VECTORMATH_HYP_H  202
 
 #include "vectormath_exp.h"
 
diff --git a/vectormath_lib.h b/vectormath_lib.h
index 05890c2..905a7ad 100644
--- a/vectormath_lib.h
+++ b/vectormath_lib.h
@@ -1,7 +1,7 @@
 /****************************  vectormath_lib.h   *****************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2022-07-20
+* Last modified: 2022-07-26
 * Version:       2.02.00
 * Project:       vector class library
 * Description:
@@ -24,7 +24,7 @@
 
 // check combination of header files
 #ifndef VECTORMATH_LIB_H
-#define VECTORMATH_LIB_H  2
+#define VECTORMATH_LIB_H  202
 
 #ifdef VECTORMATH_COMMON_H
 #error conflicting header files. More than one implementation of mathematical functions included
@@ -36,11 +36,23 @@
 namespace VCL_NAMESPACE {    // optional name space
 #endif
 
+#if defined(__clang__) || defined (__GNUC__)
+#define SINCOS_ASM  // sincos can be fixed with inline assembly
+#else
+    // MS compiler does not support inline assembly. sincos not available
+#endif
+
+
+#if !(defined(__INTEL_COMPILER) && defined(__clang__))
+#define TRIGPI_FUNCTIONS   // sinpi etc. not yet defined intel icpx compiler 2022.1
+#endif
+
 
 #ifdef __INTEL_COMPILER
+
 /*****************************************************************************
 *
-*      128-bit vector functions using Intel compiler
+*      128-bit vector functions using Intel compiler intrinsic functions
 *
 *****************************************************************************/
 
@@ -145,6 +157,27 @@ static inline Vec2d tan(Vec2d const x) {    // tangent
     return _mm_tan_pd(x);
 }
 
+#ifdef TRIGPI_FUNCTIONS
+static inline Vec4f sinpi(Vec4f const x) {    // sine
+    return _mm_sinpi_ps(x);
+}
+static inline Vec2d sinpi(Vec2d const x) {    // sine
+    return _mm_sinpi_pd(x);
+}
+static inline Vec4f cospi(Vec4f const x) {    // cosine
+    return _mm_cospi_ps(x);
+}
+static inline Vec2d cospi(Vec2d const x) {    // cosine
+    return _mm_cospi_pd(x);
+}
+static inline Vec4f tanpi(Vec4f const x) {    // tangent
+    return _mm_tanpi_ps(x);
+}
+static inline Vec2d tanpi(Vec2d const x) {    // tangent
+    return _mm_tanpi_pd(x);
+}
+#endif // TRIGPI_FUNCTIONS
+
 // inverse trigonometric functions
 static inline Vec4f asin(Vec4f const x) {   // inverse sine
     return _mm_asin_ps(x);
@@ -245,25 +278,23 @@ static inline Vec2d cdfnorminv(Vec2d const x) {  // inverse cumulative normal di
 }
 
 #else
-/*****************************************************************************
+/*************************************************************************************
 *
-*      128-bit vector functions using other compiler than Intel
+*      128-bit vector functions using other compiler than Intel C++ compiler "Classic"
 *
-*****************************************************************************/
+*************************************************************************************/
 
-#if (defined(_WIN64) || defined(__CYGWIN__)) && defined(__x86_64__)
-// fix incompatible calling convention in Win64
-#if defined(_MSC_VER) || defined(__clang__)
-#define V_VECTORCALL __vectorcall
+#if (defined(_WIN64) && !defined(__INTEL_COMPILER) )
+// (call with one parameter may work without __vectorcall because the parameter happens to be in zmm0, but that would be unsafe)
+#define V_VECTORCALL  __vectorcall  // fix calling convention, one parameter.
+#define V_VECTORCALL2 __vectorcall  // fix calling convention, two parameters or two returns
 #else
-// gcc. Change this if future gcc version supports __vectorcall
-#define V_VECTORCALL __attribute__((sysv_abi))  // this is inefficient but it works
-#endif
-#else  // not Win64. Vectors are transferred in registers by default
 #define V_VECTORCALL
+#define V_VECTORCALL2
 #endif
 
-// External function prototypes, 128-bit vectors
+
+// External function prototypes for SVML library, 128-bit vectors
 extern "C" {
     extern __m128  V_VECTORCALL __svml_expf4       (__m128);
     extern __m128d V_VECTORCALL __svml_exp2        (__m128d);
@@ -273,8 +304,8 @@ extern "C" {
     extern __m128d V_VECTORCALL __svml_exp22       (__m128d);
     extern __m128  V_VECTORCALL __svml_exp10f4     (__m128);
     extern __m128d V_VECTORCALL __svml_exp102      (__m128d);
-    extern __m128  V_VECTORCALL __svml_powf4       (__m128,  __m128);
-    extern __m128d V_VECTORCALL __svml_pow2        (__m128d, __m128d);
+    extern __m128  V_VECTORCALL2 __svml_powf4      (__m128,  __m128);
+    extern __m128d V_VECTORCALL2 __svml_pow2       (__m128d, __m128d);
     extern __m128  V_VECTORCALL __svml_cbrtf4      (__m128);
     extern __m128d V_VECTORCALL __svml_cbrt2       (__m128d);
     extern __m128  V_VECTORCALL __svml_invsqrtf4   (__m128);
@@ -291,18 +322,26 @@ extern "C" {
     extern __m128d V_VECTORCALL __svml_sin2        (__m128d);
     extern __m128  V_VECTORCALL __svml_cosf4       (__m128);
     extern __m128d V_VECTORCALL __svml_cos2        (__m128d);
-    extern __m128  V_VECTORCALL __svml_sincosf4    (__m128);  // cos returned in xmm1
-    extern __m128d V_VECTORCALL __svml_sincos2     (__m128d); // cos returned in xmm1
+    extern __m128  V_VECTORCALL2 __svml_sincosf4   (__m128);  // cos returned in xmm1
+    extern __m128d V_VECTORCALL2 __svml_sincos2    (__m128d); // cos returned in xmm1
     extern __m128  V_VECTORCALL __svml_tanf4       (__m128);
-    extern __m128d V_VECTORCALL __svml_tan2        (__m128d);
+    extern __m128d V_VECTORCALL __svml_tan2        (__m128d);    
+    extern __m128  V_VECTORCALL __svml_sinpif4     (__m128);
+    extern __m128d V_VECTORCALL __svml_sinpi2      (__m128d);
+    extern __m128  V_VECTORCALL __svml_cospif4     (__m128);
+    extern __m128d V_VECTORCALL __svml_cospi2      (__m128d);
+    //extern __m128  V_VECTORCALL2 __svml_sincospif4 (__m128); // not in library
+    //extern __m128d V_VECTORCALL2 __svml_sincospi2  (__m128d);// not in library
+    extern __m128  V_VECTORCALL __svml_tanpif4     (__m128);
+    extern __m128d V_VECTORCALL __svml_tanpi2      (__m128d);
     extern __m128  V_VECTORCALL __svml_asinf4      (__m128);
     extern __m128d V_VECTORCALL __svml_asin2       (__m128d);
     extern __m128  V_VECTORCALL __svml_acosf4      (__m128);
     extern __m128d V_VECTORCALL __svml_acos2       (__m128d);
     extern __m128  V_VECTORCALL __svml_atanf4      (__m128);
     extern __m128d V_VECTORCALL __svml_atan2       (__m128d);
-    extern __m128  V_VECTORCALL __svml_atan2f4     (__m128,  __m128);
-    extern __m128d V_VECTORCALL __svml_atan22      (__m128d, __m128d);
+    extern __m128  V_VECTORCALL2 __svml_atan2f4    (__m128,  __m128);
+    extern __m128d V_VECTORCALL2 __svml_atan22     (__m128d, __m128d);
     extern __m128  V_VECTORCALL __svml_sinhf4      (__m128);
     extern __m128d V_VECTORCALL __svml_sinh2       (__m128d);
     extern __m128  V_VECTORCALL __svml_coshf4      (__m128);
@@ -344,10 +383,10 @@ static inline Vec2d exp (Vec2d const x) {   // exponential function
     return  __svml_exp2(x);
 }
 
-static inline Vec4f expm1 (Vec4f const x) { // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec4f expm1 (Vec4f const x) { // exp(x)-1
     return  __svml_expm1f4(x);
 }
-static inline Vec2d expm1 (Vec2d const x) { // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec2d expm1 (Vec2d const x) { // exp(x)-1
     return  __svml_expm12(x);
 }
 
@@ -369,13 +408,13 @@ static inline Vec4f pow (Vec4f const a, Vec4f const b) {   // pow(a,b) = a to th
     return  __svml_powf4(a,b);
 }
 
-static inline Vec4f pow (Vec4f const a, float const b) {   // pow(a,b) = a to the power of b
+static inline Vec4f pow (Vec4f const a, float const b) {   // pow(a,b)
     return  __svml_powf4(a,Vec4f(b));
 }
-static inline Vec2d pow (Vec2d const a, Vec2d const b) {   // pow(a,b) = a to the power of b
+static inline Vec2d pow (Vec2d const a, Vec2d const b) {   // pow(a,b)
     return  __svml_pow2(a,b);
 }
-static inline Vec2d pow (Vec2d const a, double const b) {  // pow(a,b) = a to the power of b
+static inline Vec2d pow (Vec2d const a, double const b) {  // pow(a,b)
     return  __svml_pow2(a,Vec2d(b));
 }
 
@@ -430,28 +469,23 @@ static inline Vec2d cos (Vec2d const x) {   // cosine
     return  __svml_cos2(x);
 }
 
-#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
-// no inline assembly in 64 bit MS compiler
-static inline Vec4f sincos (Vec4f * pcos, Vec4f const x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
+// sincos function. sin(x) returned, cos(x) in pcos
+
+#ifdef SINCOS_ASM  // sincos can be fixed with inline assembly
+
+static inline Vec4f sincos (Vec4f * pcos, Vec4f const x) {
     __m128 r_sin, r_cos;
-    r_sin = __svml_sincosf4(x);
-#if defined(__unix__) || defined(__GNUC__)
-    //   __asm__ ( "call V_VECTORCALL __svml_sincosf4 \n movaps %%xmm0, %0 \n movaps %%xmm1, %1" : "=m"(r_sin), "=m"(r_cos) : "xmm0"(x) );
+    //   __asm__ ( "call __svml_sincosf4 \n movaps %%xmm0, %0 \n movaps %%xmm1, %1" : "=m"(r_sin), "=m"(r_cos) : "xmm0"(x) );
+    r_sin = __svml_sincosf4(x); // fix calling convention in windows and linux using assembly
     __asm__ __volatile__ ( "movaps %%xmm1, %0":"=m"(r_cos));
-#else // Windows
-    _asm movaps r_cos, xmm1;
-#endif
     *pcos = r_cos;
     return r_sin;
 }
+
 static inline Vec2d sincos (Vec2d * pcos, Vec2d const x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
     __m128d r_sin, r_cos;
     r_sin = __svml_sincos2(x);
-#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "movaps %%xmm1, %0":"=m"(r_cos));
-#else // Windows
-    _asm movapd r_cos, xmm1;
-#endif
     *pcos = r_cos;
     return r_sin;
 }
@@ -464,6 +498,27 @@ static inline Vec2d tan (Vec2d const x) {   // tangent
     return  __svml_tan2(x);
 }
 
+static inline Vec4f sinpi (Vec4f const x) {   // sine
+    return  __svml_sinpif4(x);
+}
+static inline Vec2d sinpi (Vec2d const x) {   // sine
+    return  __svml_sinpi2(x);
+}
+
+static inline Vec4f cospi (Vec4f const x) {   // cosine
+    return  __svml_cospif4(x);
+}
+static inline Vec2d cospi (Vec2d const x) {   // cosine
+    return  __svml_cospi2(x);
+}
+
+static inline Vec4f tanpi (Vec4f const x) {   // tangent
+    return  __svml_tanpif4(x);
+}
+static inline Vec2d tanpi (Vec2d const x) {   // tangent
+    return  __svml_tanpi2(x);
+}
+
 // inverse trigonometric functions
 static inline Vec4f asin (Vec4f const x) {  // inverse sine
     return  __svml_asinf4(x);
@@ -574,99 +629,102 @@ static inline Vec2d cdfnorminv (Vec2d const x) { // inverse cumulative normal di
 
 #endif   // __INTEL_COMPILER
 
+
+
 #if defined (MAX_VECTOR_SIZE) && MAX_VECTOR_SIZE >= 256  // 256 bit vectors
 
-#if defined (VECTORF256_H)   // 256-bit vector registers supported
+#if defined (VECTORF256_H)  // 256-bit vector registers supported
 
 #ifdef __INTEL_COMPILER
 /*****************************************************************************
 *
-*      256-bit vector functions using Intel compiler
+*      256-bit vector functions using Intel compiler intrinsic functions
 *
 *****************************************************************************/
+
 // exponential and power functions
-static inline Vec8f exp(Vec8f const x) {    // exponential function
+static inline Vec8f exp(Vec8f const x) {       // exponential function
     return _mm256_exp_ps(x);
 }
-static inline Vec4d exp(Vec4d const x) {    // exponential function
+static inline Vec4d exp(Vec4d const x) {       // exponential function
     return _mm256_exp_pd(x);
 }
-static inline Vec8f expm1(Vec8f const x) {  // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec8f expm1(Vec8f const x) {     // exp(x)-1. Avoids loss of precision if x is close to 1
     return _mm256_expm1_ps(x);
 }
-static inline Vec4d expm1(Vec4d const x) {  // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec4d expm1(Vec4d const x) {     // exp(x)-1. Avoids loss of precision if x is close to 1
     return _mm256_expm1_pd(x);
 }
-static inline Vec8f exp2(Vec8f const x) {   // pow(2,x)
+static inline Vec8f exp2(Vec8f const x) {      // pow(2,x)
     return _mm256_exp2_ps(x);
 }
-static inline Vec4d exp2(Vec4d const x) {   // pow(2,x)
+static inline Vec4d exp2(Vec4d const x) {      // pow(2,x)
     return _mm256_exp2_pd(x);
 }
-static inline Vec8f exp10(Vec8f const x) {  // pow(10,x)
+static inline Vec8f exp10(Vec8f const x) {     // pow(10,x)
     return _mm256_exp10_ps(x);
 }
-static inline Vec4d exp10(Vec4d const x) {  // pow(10,x)
+static inline Vec4d exp10(Vec4d const x) {     // pow(10,x)
     return _mm256_exp10_pd(x);
 }
-static inline Vec8f pow(Vec8f const a, Vec8f const b) {    // pow(a,b) = a to the power of b
+static inline Vec8f pow(Vec8f const a, Vec8f const b) { // pow(a,b) = a to the power of b
     return _mm256_pow_ps(a, b);
 }
-static inline Vec8f pow(Vec8f const a, float const b) {    // pow(a,b) = a to the power of b
+static inline Vec8f pow(Vec8f const a, float const b) {  // pow(a,b) = a to the power of b
     return _mm256_pow_ps(a, Vec8f(b));
 }
-static inline Vec4d pow(Vec4d const a, Vec4d const b) {    // pow(a,b) = a to the power of b
+static inline Vec4d pow(Vec4d const a, Vec4d const b) {  // pow(a,b) = a to the power of b
     return _mm256_pow_pd(a, b);
 }
-static inline Vec4d pow(Vec4d const a, double const b) {   // pow(a,b) = a to the power of b
+static inline Vec4d pow(Vec4d const a, double const b) { // pow(a,b) = a to the power of b
     return _mm256_pow_pd(a, Vec4d(b));
 }
-static inline Vec8f cbrt(Vec8f const x) {   // pow(x,1/3)
+static inline Vec8f cbrt(Vec8f const x) {      // pow(x,1/3)
     return _mm256_cbrt_ps(x);
 }
-static inline Vec4d cbrt(Vec4d const x) {   // pow(x,1/3)
+static inline Vec4d cbrt(Vec4d const x) {      // pow(x,1/3)
     return _mm256_cbrt_pd(x);
 }
 // logarithms
-static inline Vec8f log(Vec8f const x) {    // natural logarithm
+static inline Vec8f log(Vec8f const x) {       // natural logarithm
     return _mm256_log_ps(x);
 }
-static inline Vec4d log(Vec4d const x) {    // natural logarithm
+static inline Vec4d log(Vec4d const x) {       // natural logarithm
     return _mm256_log_pd(x);
 }
-static inline Vec8f log1p(Vec8f const x) {  // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec8f log1p(Vec8f const x) {     // log(1+x). Avoids loss of precision if 1+x is close to 1
     return _mm256_log1p_ps(x);
 }
-static inline Vec4d log1p(Vec4d const x) {  // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec4d log1p(Vec4d const x) {     // log(1+x). Avoids loss of precision if 1+x is close to 1
     return _mm256_log1p_pd(x);
 }
-static inline Vec8f log2(Vec8f const x) {   // logarithm base 2
+static inline Vec8f log2(Vec8f const x) {      // logarithm base 2
     return _mm256_log2_ps(x);
 }
-static inline Vec4d log2(Vec4d const x) {   // logarithm base 2
+static inline Vec4d log2(Vec4d const x) {      // logarithm base 2
     return _mm256_log2_pd(x);
 }
-static inline Vec8f log10(Vec8f const x) {  // logarithm base 10
+static inline Vec8f log10(Vec8f const x) {     // logarithm base 10
     return _mm256_log10_ps(x);
 }
-static inline Vec4d log10(Vec4d const x) {  // logarithm base 10
+static inline Vec4d log10(Vec4d const x) {     // logarithm base 10
     return _mm256_log10_pd(x);
 }
 
 // trigonometric functions
-static inline Vec8f sin(Vec8f const x) {    // sine
+static inline Vec8f sin(Vec8f const x) {       // sine
     return _mm256_sin_ps(x);
 }
-static inline Vec4d sin(Vec4d const x) {    // sine
+static inline Vec4d sin(Vec4d const x) {       // sine
     return _mm256_sin_pd(x);
 }
-static inline Vec8f cos(Vec8f const x) {    // cosine
+static inline Vec8f cos(Vec8f const x) {       // cosine
     return _mm256_cos_ps(x);
 }
-static inline Vec4d cos(Vec4d const x) {    // cosine
+static inline Vec4d cos(Vec4d const x) {       // cosine
     return _mm256_cos_pd(x);
 }
-static inline Vec8f sincos(Vec8f * pcos, Vec8f const x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
+static inline Vec8f sincos(Vec8f * pcos, Vec8f const x) { // sine and cosine. sin(x) returned, cos(x) in pcos
     __m256 r_sin, r_cos;
     r_sin = _mm256_sincos_ps(&r_cos, x);
     *pcos = r_cos;
@@ -678,320 +736,360 @@ static inline Vec4d sincos(Vec4d * pcos, Vec4d const x) {  // sine and cosine. s
     *pcos = r_cos;
     return r_sin;
 }
-static inline Vec8f tan(Vec8f const x) {    // tangent
+static inline Vec8f tan(Vec8f const x) {       // tangent
     return _mm256_tan_ps(x);
 }
-static inline Vec4d tan(Vec4d const x) {    // tangent
+static inline Vec4d tan(Vec4d const x) {       // tangent
     return _mm256_tan_pd(x);
 }
 
+#ifdef TRIGPI_FUNCTIONS
+static inline Vec8f sinpi(Vec8f const x) {     // sine
+    return _mm256_sinpi_ps(x);
+}
+static inline Vec4d sinpi(Vec4d const x) {     // sine
+    return _mm256_sinpi_pd(x);
+}
+static inline Vec8f cospi(Vec8f const x) {     // cosine
+    return _mm256_cospi_ps(x);
+}
+static inline Vec4d cospi(Vec4d const x) {     // cosine
+    return _mm256_cospi_pd(x);
+}
+static inline Vec8f tanpi(Vec8f const x) {     // tangent
+    return _mm256_tanpi_ps(x);
+}
+static inline Vec4d tanpi(Vec4d const x) {     // tangent
+    return _mm256_tanpi_pd(x);
+}
+#endif // TRIGPI_FUNCTIONS
+
 // inverse trigonometric functions
-static inline Vec8f asin(Vec8f const x) {   // inverse sine
+static inline Vec8f asin(Vec8f const x) {      // inverse sine
     return _mm256_asin_ps(x);
 }
-static inline Vec4d asin(Vec4d const x) {   // inverse sine
+static inline Vec4d asin(Vec4d const x) {      // inverse sine
     return _mm256_asin_pd(x);
 }
 
-static inline Vec8f acos(Vec8f const x) {   // inverse cosine
+static inline Vec8f acos(Vec8f const x) {      // inverse cosine
     return _mm256_acos_ps(x);
 }
-static inline Vec4d acos(Vec4d const x) {   // inverse cosine
+static inline Vec4d acos(Vec4d const x) {      // inverse cosine
     return _mm256_acos_pd(x);
 }
 
-static inline Vec8f atan(Vec8f const x) {   // inverse tangent
+static inline Vec8f atan(Vec8f const x) {      // inverse tangent
     return _mm256_atan_ps(x);
 }
-static inline Vec4d atan(Vec4d const x) {   // inverse tangent
+static inline Vec4d atan(Vec4d const x) {      // inverse tangent
     return _mm256_atan_pd(x);
 }
-static inline Vec8f atan2(Vec8f const a, Vec8f const b) {  // inverse tangent of a/b
+static inline Vec8f atan2(Vec8f const a, Vec8f const b) { // inverse tangent of a/b
     return _mm256_atan2_ps(a, b);
 }
-static inline Vec4d atan2(Vec4d const a, Vec4d const b) {  // inverse tangent of a/b
+static inline Vec4d atan2(Vec4d const a, Vec4d const b) { // inverse tangent of a/b
     return _mm256_atan2_pd(a, b);
 }
 
 // hyperbolic functions and inverse hyperbolic functions
-static inline Vec8f sinh(Vec8f const x) {   // hyperbolic sine
+static inline Vec8f sinh(Vec8f const x) {      // hyperbolic sine
     return _mm256_sinh_ps(x);
 }
-static inline Vec4d sinh(Vec4d const x) {   // hyperbolic sine
+static inline Vec4d sinh(Vec4d const x) {      // hyperbolic sine
     return _mm256_sinh_pd(x);
 }
-static inline Vec8f cosh(Vec8f const x) {   // hyperbolic cosine
+static inline Vec8f cosh(Vec8f const x) {      // hyperbolic cosine
     return _mm256_cosh_ps(x);
 }
-static inline Vec4d cosh(Vec4d const x) {   // hyperbolic cosine
+static inline Vec4d cosh(Vec4d const x) {      // hyperbolic cosine
     return _mm256_cosh_pd(x);
 }
-static inline Vec8f tanh(Vec8f const x) {   // hyperbolic tangent
+static inline Vec8f tanh(Vec8f const x) {      // hyperbolic tangent
     return _mm256_tanh_ps(x);
 }
-static inline Vec4d tanh(Vec4d const x) {   // hyperbolic tangent
+static inline Vec4d tanh(Vec4d const x) {      // hyperbolic tangent
     return _mm256_tanh_pd(x);
 }
-static inline Vec8f asinh(Vec8f const x) {  // inverse hyperbolic sine
+static inline Vec8f asinh(Vec8f const x) {     // inverse hyperbolic sine
     return _mm256_asinh_ps(x);
 }
-static inline Vec4d asinh(Vec4d const x) {  // inverse hyperbolic sine
+static inline Vec4d asinh(Vec4d const x) {     // inverse hyperbolic sine
     return _mm256_asinh_pd(x);
 }
-static inline Vec8f acosh(Vec8f const x) {  // inverse hyperbolic cosine
+static inline Vec8f acosh(Vec8f const x) {     // inverse hyperbolic cosine
     return _mm256_acosh_ps(x);
 }
-static inline Vec4d acosh(Vec4d const x) {  // inverse hyperbolic cosine
+static inline Vec4d acosh(Vec4d const x) {     // inverse hyperbolic cosine
     return _mm256_acosh_pd(x);
 }
-static inline Vec8f atanh(Vec8f const x) {  // inverse hyperbolic tangent
+static inline Vec8f atanh(Vec8f const x) {     // inverse hyperbolic tangent
     return _mm256_atanh_ps(x);
 }
-static inline Vec4d atanh(Vec4d const x) {  // inverse hyperbolic tangent
+static inline Vec4d atanh(Vec4d const x) {     // inverse hyperbolic tangent
     return _mm256_atanh_pd(x);
 }
 
 // error function
-static inline Vec8f erf(Vec8f const x) {    // error function
+static inline Vec8f erf(Vec8f const x) {       // error function
     return _mm256_erf_ps(x);
 }
-static inline Vec4d erf(Vec4d const x) {    // error function
+static inline Vec4d erf(Vec4d const x) {       // error function
     return _mm256_erf_pd(x);
 }
-static inline Vec8f erfc(Vec8f const x) {   // error function complement
+static inline Vec8f erfc(Vec8f const x) {      // error function complement
     return _mm256_erfc_ps(x);
 }
-static inline Vec4d erfc(Vec4d const x) {   // error function complement
+static inline Vec4d erfc(Vec4d const x) {      // error function complement
     return _mm256_erfc_pd(x);
 }
-static inline Vec8f erfinv(Vec8f const x) { // inverse error function
+static inline Vec8f erfinv(Vec8f const x) {    // inverse error function
     return _mm256_erfinv_ps(x);
 }
-static inline Vec4d erfinv(Vec4d const x) { // inverse error function
+static inline Vec4d erfinv(Vec4d const x) {    // inverse error function
     return _mm256_erfinv_pd(x);
 }
 
-static inline Vec8f cdfnorm(Vec8f const x) {     // cumulative normal distribution function
+static inline Vec8f cdfnorm(Vec8f const x) {   // cumulative normal distribution function
     return _mm256_cdfnorm_ps(x);
 }
-static inline Vec4d cdfnorm(Vec4d const x) {     // cumulative normal distribution function
+static inline Vec4d cdfnorm(Vec4d const x) {   // cumulative normal distribution function
     return _mm256_cdfnorm_pd(x);
 }
-static inline Vec8f cdfnorminv(Vec8f const x) {  // inverse cumulative normal distribution function
+static inline Vec8f cdfnorminv(Vec8f const x) {// inverse cumulative normal distribution function
     return _mm256_cdfnorminv_ps(x);
 }
-static inline Vec4d cdfnorminv(Vec4d const x) {  // inverse cumulative normal distribution function
+static inline Vec4d cdfnorminv(Vec4d const x) {// inverse cumulative normal distribution function
     return _mm256_cdfnorminv_pd(x);
 }
 
-
-#else    // __INTEL_COMPILER
+#else    // not __INTEL_COMPILER
 /*****************************************************************************
 *
 *      256-bit vector functions using other compiler than Intel
 *
 *****************************************************************************/
-// External function prototypes, 256-bit vectors
+
+// External function prototypes for SVML library, 256-bit vectors
 extern "C" {
-    extern __m256  V_VECTORCALL __svml_expf8       (__m256);
-    extern __m256d V_VECTORCALL __svml_exp4        (__m256d);
-    extern __m256  V_VECTORCALL __svml_expm1f8     (__m256);
-    extern __m256d V_VECTORCALL __svml_expm14      (__m256d);
-    extern __m256  V_VECTORCALL __svml_exp2f8      (__m256);
-    extern __m256d V_VECTORCALL __svml_exp24       (__m256d);
-    extern __m256  V_VECTORCALL __svml_exp10f8     (__m256);
-    extern __m256d V_VECTORCALL __svml_exp104      (__m256d);
-    extern __m256  V_VECTORCALL __svml_powf8       (__m256,  __m256);
-    extern __m256d V_VECTORCALL __svml_pow4        (__m256d, __m256d);
-    extern __m256  V_VECTORCALL __svml_cbrtf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_cbrt4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_invsqrtf8   (__m256);
-    extern __m256d V_VECTORCALL __svml_invsqrt4    (__m256d);
-    extern __m256  V_VECTORCALL __svml_logf8       (__m256);
-    extern __m256d V_VECTORCALL __svml_log4        (__m256d);
-    extern __m256  V_VECTORCALL __svml_log1pf8     (__m256);
-    extern __m256d V_VECTORCALL __svml_log1p4      (__m256d);
-    extern __m256  V_VECTORCALL __svml_log2f8      (__m256);
-    extern __m256d V_VECTORCALL __svml_log24       (__m256d);
-    extern __m256  V_VECTORCALL __svml_log10f8     (__m256);
-    extern __m256d V_VECTORCALL __svml_log104      (__m256d);
-    extern __m256  V_VECTORCALL __svml_sinf8       (__m256);
-    extern __m256d V_VECTORCALL __svml_sin4        (__m256d);
-    extern __m256  V_VECTORCALL __svml_cosf8       (__m256);
-    extern __m256d V_VECTORCALL __svml_cos4        (__m256d);
-    extern __m256  V_VECTORCALL __svml_sincosf8    (__m256);  // cos returned in ymm1
-    extern __m256d V_VECTORCALL __svml_sincos4     (__m256d); // cos returned in ymm1
-    extern __m256  V_VECTORCALL __svml_tanf8       (__m256);
-    extern __m256d V_VECTORCALL __svml_tan4        (__m256d);
-    extern __m256  V_VECTORCALL __svml_asinf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_asin4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_acosf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_acos4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_atanf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_atan4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_atan2f8     (__m256, __m256);
-    extern __m256d V_VECTORCALL __svml_atan24      (__m256d, __m256d);
-    extern __m256  V_VECTORCALL __svml_sinhf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_sinh4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_coshf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_cosh4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_tanhf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_tanh4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_asinhf8     (__m256);
-    extern __m256d V_VECTORCALL __svml_asinh4      (__m256d);
-    extern __m256  V_VECTORCALL __svml_acoshf8     (__m256);
-    extern __m256d V_VECTORCALL __svml_acosh4      (__m256d);
-    extern __m256  V_VECTORCALL __svml_atanhf8     (__m256);
-    extern __m256d V_VECTORCALL __svml_atanh4      (__m256d);
-    extern __m256  V_VECTORCALL __svml_erff8       (__m256);
-    extern __m256d V_VECTORCALL __svml_erf4        (__m256d);
-    extern __m256  V_VECTORCALL __svml_erfcf8      (__m256);
-    extern __m256d V_VECTORCALL __svml_erfc4       (__m256d);
-    extern __m256  V_VECTORCALL __svml_erfinvf8    (__m256);
-    extern __m256d V_VECTORCALL __svml_erfinv4     (__m256d);
+    extern __m256  V_VECTORCALL __svml_expf8        (__m256);
+    extern __m256d V_VECTORCALL __svml_exp4         (__m256d);
+    extern __m256  V_VECTORCALL __svml_expm1f8      (__m256);
+    extern __m256d V_VECTORCALL __svml_expm14       (__m256d);
+    extern __m256  V_VECTORCALL __svml_exp2f8       (__m256);
+    extern __m256d V_VECTORCALL __svml_exp24        (__m256d);
+    extern __m256  V_VECTORCALL __svml_exp10f8      (__m256);
+    extern __m256d V_VECTORCALL __svml_exp104       (__m256d);
+    extern __m256  V_VECTORCALL2 __svml_powf8       (__m256,  __m256);
+    extern __m256d V_VECTORCALL2 __svml_pow4        (__m256d, __m256d);
+    extern __m256  V_VECTORCALL __svml_cbrtf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_cbrt4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_invsqrtf8    (__m256);
+    extern __m256d V_VECTORCALL __svml_invsqrt4     (__m256d);
+    extern __m256  V_VECTORCALL __svml_logf8        (__m256);
+    extern __m256d V_VECTORCALL __svml_log4         (__m256d);
+    extern __m256  V_VECTORCALL __svml_log1pf8      (__m256);
+    extern __m256d V_VECTORCALL __svml_log1p4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_log2f8       (__m256);
+    extern __m256d V_VECTORCALL __svml_log24        (__m256d);
+    extern __m256  V_VECTORCALL __svml_log10f8      (__m256);
+    extern __m256d V_VECTORCALL __svml_log104       (__m256d);
+    extern __m256  V_VECTORCALL __svml_sinf8        (__m256);
+    extern __m256d V_VECTORCALL __svml_sin4         (__m256d);
+    extern __m256  V_VECTORCALL __svml_cosf8        (__m256);
+    extern __m256d V_VECTORCALL __svml_cos4         (__m256d);
+    extern __m256  V_VECTORCALL2 __svml_sincosf8    (__m256);  // cos returned in ymm1
+    extern __m256d V_VECTORCALL2 __svml_sincos4     (__m256d); // cos returned in ymm1
+    extern __m256  V_VECTORCALL __svml_tanf8        (__m256);
+    extern __m256d V_VECTORCALL __svml_tan4         (__m256d);
+    extern __m256  V_VECTORCALL __svml_sinpif8      (__m256);
+    extern __m256d V_VECTORCALL __svml_sinpi4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_cospif8      (__m256);
+    extern __m256d V_VECTORCALL __svml_cospi4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_tanpif8      (__m256);
+    extern __m256d V_VECTORCALL __svml_tanpi4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_asinf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_asin4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_acosf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_acos4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_atanf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_atan4        (__m256d);
+    extern __m256  V_VECTORCALL2 __svml_atan2f8     (__m256, __m256);
+    extern __m256d V_VECTORCALL2 __svml_atan24      (__m256d, __m256d);
+    extern __m256  V_VECTORCALL __svml_sinhf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_sinh4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_coshf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_cosh4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_tanhf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_tanh4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_asinhf8      (__m256);
+    extern __m256d V_VECTORCALL __svml_asinh4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_acoshf8      (__m256);
+    extern __m256d V_VECTORCALL __svml_acosh4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_atanhf8      (__m256);
+    extern __m256d V_VECTORCALL __svml_atanh4       (__m256d);
+    extern __m256  V_VECTORCALL __svml_erff8        (__m256);
+    extern __m256d V_VECTORCALL __svml_erf4         (__m256d);
+    extern __m256  V_VECTORCALL __svml_erfcf8       (__m256);
+    extern __m256d V_VECTORCALL __svml_erfc4        (__m256d);
+    extern __m256  V_VECTORCALL __svml_erfinvf8     (__m256);
+    extern __m256d V_VECTORCALL __svml_erfinv4      (__m256d);
     extern __m256  V_VECTORCALL __svml_cdfnorminvf8(__m256);
-    extern __m256d V_VECTORCALL __svml_cdfnorminv4 (__m256d);
-    extern __m256  V_VECTORCALL __svml_cdfnormf8   (__m256);
-    extern __m256d V_VECTORCALL __svml_cdfnorm4    (__m256d);
-    //extern __m256  V_VECTORCALL __svml_cexpf8      (__m256);
-    //extern __m256d V_VECTORCALL __svml_cexp4       (__m256d);
+    extern __m256d V_VECTORCALL __svml_cdfnorminv4  (__m256d);
+    extern __m256  V_VECTORCALL __svml_cdfnormf8    (__m256);
+    extern __m256d V_VECTORCALL __svml_cdfnorm4     (__m256d);
+    //extern __m256  V_VECTORCALL __svml_cexpf8     (__m256);
+    //extern __m256d V_VECTORCALL __svml_cexp4      (__m256d);
 }
 
 
 // exponential and power functions
-static inline Vec8f exp (Vec8f const x) {   // exponential function
+static inline Vec8f exp (Vec8f const x) {      // exponential function
     return  __svml_expf8(x);
 }
-static inline Vec4d exp (Vec4d const x) {   // exponential function
+static inline Vec4d exp (Vec4d const x) {      // exponential function
     return  __svml_exp4(x);
 }
-static inline Vec8f expm1 (Vec8f const x) { // exp(x)-1
+static inline Vec8f expm1 (Vec8f const x) {    // exp(x)-1
     return  __svml_expm1f8(x);
 }
-static inline Vec4d expm1 (Vec4d const x) { // exp(x)-1
+static inline Vec4d expm1 (Vec4d const x) {    // exp(x)-1
     return  __svml_expm14(x);
 }
-static inline Vec8f exp2 (Vec8f const x) {  // pow(2,x)
+static inline Vec8f exp2 (Vec8f const x) {     // pow(2,x)
     return  __svml_exp2f8(x);
 }
-static inline Vec4d exp2 (Vec4d const x) {  // pow(2,x)
+static inline Vec4d exp2 (Vec4d const x) {     // pow(2,x)
     return  __svml_exp24(x);
 }
-static inline Vec8f exp10 (Vec8f const x) { // pow(10,x)
+static inline Vec8f exp10 (Vec8f const x) {    // pow(10,x)
     return  __svml_exp10f8(x);
 }
-static inline Vec4d exp10 (Vec4d const x) { // pow(10,x)
+static inline Vec4d exp10 (Vec4d const x) {    // pow(10,x)
     return  __svml_exp104(x);
 }
-static inline Vec8f pow (Vec8f const a, Vec8f const b) {   // pow(a,b) = a to the power of b
+static inline Vec8f pow (Vec8f const a, Vec8f const b) {  // pow(a,b) = a to the power of b
     return  __svml_powf8(a,b);
 }
-static inline Vec8f pow (Vec8f const a, float const b) {   // pow(a,b) = a to the power of b
+static inline Vec8f pow (Vec8f const a, float const b) {  // pow(a,b)
     return  __svml_powf8(a,Vec8f(b));
 }
-static inline Vec4d pow (Vec4d const a, Vec4d const b) {   // pow(a,b) = a to the power of b
+static inline Vec4d pow (Vec4d const a, Vec4d const b) {  // pow(a,b)
     return  __svml_pow4(a,b);
 }
-static inline Vec4d pow (Vec4d const a, double const b) {  // pow(a,b) = a to the power of b
+static inline Vec4d pow (Vec4d const a, double const b) { // pow(a,b)
     return  __svml_pow4(a,Vec4d(b));
 }
-static inline Vec8f cbrt (Vec8f const x) {  // pow(x,1/3)
+static inline Vec8f cbrt (Vec8f const x) {     // pow(x,1/3)
     return  __svml_cbrtf8(x);
 }
-static inline Vec4d cbrt (Vec4d const x) {  // pow(x,1/3)
+static inline Vec4d cbrt (Vec4d const x) {     // pow(x,1/3)
     return  __svml_cbrt4(x);
 }
 
 // logarithms
-static inline Vec8f log (Vec8f const x) {   // natural logarithm
+static inline Vec8f log (Vec8f const x) {      // natural logarithm
     return  __svml_logf8(x);
 }
-static inline Vec4d log (Vec4d const x) {   // natural logarithm
+static inline Vec4d log (Vec4d const x) {      // natural logarithm
     return  __svml_log4(x);
 }
-static inline Vec8f log1p (Vec8f const x) { // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec8f log1p (Vec8f const x) {    // log(1+x)
     return  __svml_log1pf8(x);
 }
-static inline Vec4d log1p (Vec4d const x) { // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec4d log1p (Vec4d const x) {    // log(1+x)
     return  __svml_log1p4(x);
 }
-static inline Vec8f log2 (Vec8f const x) {  // logarithm base 2
+static inline Vec8f log2 (Vec8f const x) {     // logarithm base 2
     return  __svml_log2f8(x);
 }
-static inline Vec4d log2 (Vec4d const x) {  // logarithm base 2
+static inline Vec4d log2 (Vec4d const x) {     // logarithm base 2
     return  __svml_log24(x);
 }
-static inline Vec8f log10 (Vec8f const x) { // logarithm base 10
+static inline Vec8f log10 (Vec8f const x) {    // logarithm base 10
     return  __svml_log10f8(x);
 }
-static inline Vec4d log10 (Vec4d const x) { // logarithm base 10
+static inline Vec4d log10 (Vec4d const x) {    // logarithm base 10
     return  __svml_log104(x);
 }
 
 // trigonometric functions (angles in radians)
-static inline Vec8f sin (Vec8f const x) {   // sine
+static inline Vec8f sin (Vec8f const x) {      // sine
     return  __svml_sinf8(x);
 }
-static inline Vec4d sin (Vec4d const x) {   // sine
+static inline Vec4d sin (Vec4d const x) {      // sine
     return  __svml_sin4(x);
 }
-static inline Vec8f cos (Vec8f const x) {   // cosine
+static inline Vec8f cos (Vec8f const x) {      // cosine
     return  __svml_cosf8(x);
 }
-static inline Vec4d cos (Vec4d const x) {   // cosine
+static inline Vec4d cos (Vec4d const x) {      // cosine
     return  __svml_cos4(x);
 }
 
-#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
+#ifdef SINCOS_ASM  // sincos can be fixed with inline assembly
 // no inline assembly in 64 bit MS compiler
-static inline Vec8f sincos (Vec8f * pcos, Vec8f const x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
+// sine and cosine. sin(x) returned, cos(x) in pcos
+static inline Vec8f sincos (Vec8f * pcos, Vec8f const x) {
     __m256 r_sin, r_cos;
     r_sin = __svml_sincosf8(x);
-#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "vmovaps %%ymm1, %0":"=m"(r_cos));
-#else // Windows
-    _asm vmovaps r_cos, ymm1;
-#endif
     *pcos = r_cos;
     return r_sin;
 }
-static inline Vec4d sincos (Vec4d * pcos, Vec4d const x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
+// sine and cosine. sin(x) returned, cos(x) in pcos
+static inline Vec4d sincos (Vec4d * pcos, Vec4d const x) {
     __m256d r_sin, r_cos;
     r_sin = __svml_sincos4(x);
-#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "vmovaps %%ymm1, %0":"=m"(r_cos));
-#else // Windows
-    _asm vmovapd r_cos, ymm1;
-#endif
     *pcos = r_cos;
     return r_sin;
 }
-#endif // inline assembly available
+#endif // sincos
 
-static inline Vec8f tan (Vec8f const x) {   // tangent
+static inline Vec8f tan (Vec8f const x) {      // tangent
     return  __svml_tanf8(x);
 }
-static inline Vec4d tan (Vec4d const x) {   // tangent
+static inline Vec4d tan (Vec4d const x) {      // tangent
     return  __svml_tan4(x);
 }
 
+static inline Vec8f sinpi (Vec8f const x) {    // sine
+    return  __svml_sinpif8(x);
+}
+static inline Vec4d sinpi (Vec4d const x) {    // sine
+    return  __svml_sinpi4(x);
+}
+static inline Vec8f cospi (Vec8f const x) {    // cosine
+    return  __svml_cospif8(x);
+}
+static inline Vec4d cospi (Vec4d const x) {    // cosine
+    return  __svml_cospi4(x);
+}
+static inline Vec8f tanpi (Vec8f const x) {    // tangent
+    return  __svml_tanpif8(x);
+}
+static inline Vec4d tanpi (Vec4d const x) {    // tangent
+    return  __svml_tanpi4(x);
+}
+
 // inverse trigonometric functions
-static inline Vec8f asin (Vec8f const x) {  // inverse sine
+static inline Vec8f asin (Vec8f const x) {     // inverse sine
     return  __svml_asinf8(x);
 }
-static inline Vec4d asin (Vec4d const x) {  // inverse sine
+static inline Vec4d asin (Vec4d const x) {     // inverse sine
     return  __svml_asin4(x);
 }
-static inline Vec8f acos (Vec8f const x) {  // inverse cosine
+static inline Vec8f acos (Vec8f const x) {     // inverse cosine
     return  __svml_acosf8(x);
 }
-static inline Vec4d acos (Vec4d const x) {  // inverse cosine
+static inline Vec4d acos (Vec4d const x) {     // inverse cosine
     return  __svml_acos4(x);
 }
-static inline Vec8f atan (Vec8f const x) {  // inverse tangent
+static inline Vec8f atan (Vec8f const x) {     // inverse tangent
     return  __svml_atanf8(x);
 }
-static inline Vec4d atan (Vec4d const x) {  // inverse tangent
+static inline Vec4d atan (Vec4d const x) {     // inverse tangent
     return  __svml_atan4(x);
 }
 static inline Vec8f atan2 (Vec8f const a, Vec8f const b) { // inverse tangent of a/b
@@ -1002,285 +1100,303 @@ static inline Vec4d atan2 (Vec4d const a, Vec4d const b) { // inverse tangent of
 }
 
 // hyperbolic functions and inverse hyperbolic functions
-static inline Vec8f sinh (Vec8f const x) {  // hyperbolic sine
+static inline Vec8f sinh (Vec8f const x) {     // hyperbolic sine
     return  __svml_sinhf8(x);
 }
-static inline Vec4d sinh (Vec4d const x) {  // hyperbolic sine
+static inline Vec4d sinh (Vec4d const x) {     // hyperbolic sine
     return  __svml_sinh4(x);
 }
-static inline Vec8f cosh (Vec8f const x) {  // hyperbolic cosine
+static inline Vec8f cosh (Vec8f const x) {     // hyperbolic cosine
     return  __svml_coshf8(x);
 }
-static inline Vec4d cosh (Vec4d const x) {  // hyperbolic cosine
+static inline Vec4d cosh (Vec4d const x) {     // hyperbolic cosine
     return  __svml_cosh4(x);
 }
-static inline Vec8f tanh (Vec8f const x) {  // hyperbolic tangent
+static inline Vec8f tanh (Vec8f const x) {     // hyperbolic tangent
     return  __svml_tanhf8(x);
 }
-static inline Vec4d tanh (Vec4d const x) {  // hyperbolic tangent
+static inline Vec4d tanh (Vec4d const x) {     // hyperbolic tangent
     return  __svml_tanh4(x);
 }
-static inline Vec8f asinh (Vec8f const x) { // inverse hyperbolic sine
+static inline Vec8f asinh (Vec8f const x) {    // inverse hyperbolic sine
     return  __svml_asinhf8(x);
 }
-static inline Vec4d asinh (Vec4d const x) { // inverse hyperbolic sine
+static inline Vec4d asinh (Vec4d const x) {    // inverse hyperbolic sine
     return  __svml_asinh4(x);
 }
-static inline Vec8f acosh (Vec8f const x) { // inverse hyperbolic cosine
+static inline Vec8f acosh (Vec8f const x) {    // inverse hyperbolic cosine
     return  __svml_acoshf8(x);
 }
-static inline Vec4d acosh (Vec4d const x) { // inverse hyperbolic cosine
+static inline Vec4d acosh (Vec4d const x) {    // inverse hyperbolic cosine
     return  __svml_acosh4(x);
 }
-
-static inline Vec8f atanh (Vec8f const x) { // inverse hyperbolic tangent
+static inline Vec8f atanh (Vec8f const x) {    // inverse hyperbolic tangent
     return  __svml_atanhf8(x);
 }
-static inline Vec4d atanh (Vec4d const x) { // inverse hyperbolic tangent
+static inline Vec4d atanh (Vec4d const x) {    // inverse hyperbolic tangent
     return  __svml_atanh4(x);
 }
 
 // error function
-static inline Vec8f erf (Vec8f const x) {   // error function
+static inline Vec8f erf (Vec8f const x) {      // error function
     return  __svml_erff8(x);
 }
-static inline Vec4d erf (Vec4d const x) {   // error function
+static inline Vec4d erf (Vec4d const x) {      // error function
     return  __svml_erf4(x);
 }
-static inline Vec8f erfc (Vec8f const x) {  // error function complement
+static inline Vec8f erfc (Vec8f const x) {     // error function complement
     return  __svml_erfcf8(x);
 }
-static inline Vec4d erfc (Vec4d const x) {  // error function complement
+static inline Vec4d erfc (Vec4d const x) {     // error function complement
     return  __svml_erfc4(x);
 }
-static inline Vec8f erfinv (Vec8f const x) {     // inverse error function
+static inline Vec8f erfinv (Vec8f const x) {   // inverse error function
     return  __svml_erfinvf8(x);
 }
-static inline Vec4d erfinv (Vec4d const x) {     // inverse error function
+static inline Vec4d erfinv (Vec4d const x) {   // inverse error function
     return  __svml_erfinv4(x);
 }
 
-static inline Vec8f cdfnorm (Vec8f const x) {    // cumulative normal distribution function
+static inline Vec8f cdfnorm (Vec8f const x) {  // cumulative normal distribution function
     return  __svml_cdfnormf8(x);
 }
-static inline Vec4d cdfnorm (Vec4d const x) {    // cumulative normal distribution function
+static inline Vec4d cdfnorm (Vec4d const x) {  // cumulative normal distribution function
     return  __svml_cdfnorm4(x);
 }
-static inline Vec8f cdfnorminv (Vec8f const x) { // inverse cumulative normal distribution function
+static inline Vec8f cdfnorminv (Vec8f const x) {  // inverse cumulative normal distribution function
     return  __svml_cdfnorminvf8(x);
 }
-static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal distribution function
+static inline Vec4d cdfnorminv (Vec4d const x) {  // inverse cumulative normal distribution function
     return  __svml_cdfnorminv4(x);
 }
 
 #endif   // __INTEL_COMPILER
 
-#else    // VECTORF256_H
+#else    // not VECTORF256_H
 
 /*****************************************************************************
 *
 *      256-bit vector functions emulated with 128-bit vectors
 *
 *****************************************************************************/
+
 // exponential and power functions
-static inline Vec8f exp (Vec8f const x) {        // exponential function
+static inline Vec8f exp (Vec8f const x) {      // exponential function
     return Vec8f(exp(x.get_low()), exp(x.get_high()));
 }
-static inline Vec4d exp (Vec4d const x) {        // exponential function
+static inline Vec4d exp (Vec4d const x) {      // exponential function
     return Vec4d(exp(x.get_low()), exp(x.get_high()));
 }
-static inline Vec8f expm1 (Vec8f const x) {      // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec8f expm1 (Vec8f const x) {    // exp(x)-1
     return Vec8f(expm1(x.get_low()), expm1(x.get_high()));
 }
-static inline Vec4d expm1 (Vec4d const x) {      // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec4d expm1 (Vec4d const x) {    // exp(x)-1
     return Vec4d(expm1(x.get_low()), expm1(x.get_high()));
 }
-static inline Vec8f exp2 (Vec8f const x) {       // pow(2,x)
+static inline Vec8f exp2 (Vec8f const x) {     // pow(2,x)
     return Vec8f(exp2(x.get_low()), exp2(x.get_high()));
 }
-static inline Vec4d exp2 (Vec4d const x) {       // pow(2,x)
+static inline Vec4d exp2 (Vec4d const x) {     // pow(2,x)
     return Vec4d(exp2(x.get_low()), exp2(x.get_high()));
 }
-static inline Vec8f exp10 (Vec8f const x) {      // pow(10,x)
+static inline Vec8f exp10 (Vec8f const x) {    // pow(10,x)
     return Vec8f(exp10(x.get_low()), exp10(x.get_high()));
 }
-static inline Vec4d exp10 (Vec4d const x) {      // pow(10,x)
+static inline Vec4d exp10 (Vec4d const x) {    // pow(10,x)
     return Vec4d(exp10(x.get_low()), exp10(x.get_high()));
 }
-static inline Vec8f pow (Vec8f const a, Vec8f const b) {   // pow(a,b) = a to the power of b
+static inline Vec8f pow (Vec8f const a, Vec8f const b) {  // pow(a,b) = a to the power of b
     return Vec8f(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
 }
-static inline Vec8f pow (Vec8f const a, float const b) {   // pow(a,b) = a to the power of b
+static inline Vec8f pow (Vec8f const a, float const b) {  // pow(a,b)
     return Vec8f(pow(a.get_low(),b), pow(a.get_high(),b));
 }
-static inline Vec4d pow (Vec4d const a, Vec4d const b) {   // pow(a,b) = a to the power of b
+static inline Vec4d pow (Vec4d const a, Vec4d const b) {  // pow(a,b)
     return Vec4d(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
 }
-static inline Vec4d pow (Vec4d const a, double const b) {  // pow(a,b) = a to the power of b
+static inline Vec4d pow (Vec4d const a, double const b) { // pow(a,b)
     return Vec4d(pow(a.get_low(),b), pow(a.get_high(),b));
 }
-static inline Vec8f cbrt (Vec8f const x) {   // pow(x,1/3)
+static inline Vec8f cbrt (Vec8f const x) {     // pow(x,1/3)
     return Vec8f(cbrt(x.get_low()), cbrt(x.get_high()));
 }
-static inline Vec4d cbrt (Vec4d const x) {   // pow(x,1/3)
+static inline Vec4d cbrt (Vec4d const x) {     // pow(x,1/3)
     return Vec4d(cbrt(x.get_low()), cbrt(x.get_high()));
 }
 
 // logarithms
-static inline Vec8f log (Vec8f const x) {   // natural logarithm
+static inline Vec8f log (Vec8f const x) {      // natural logarithm
     return Vec8f(log(x.get_low()), log(x.get_high()));
 }
-static inline Vec4d log (Vec4d const x) {   // natural logarithm
+static inline Vec4d log (Vec4d const x) {      // natural logarithm
     return Vec4d(log(x.get_low()), log(x.get_high()));
 }
-static inline Vec8f log1p (Vec8f const x) { // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec8f log1p (Vec8f const x) {    // log(1+x). Avoids loss of precision if 1+x is close to 1
     return Vec8f(log1p(x.get_low()), log1p(x.get_high()));
 }
-static inline Vec4d log1p (Vec4d const x) { // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec4d log1p (Vec4d const x) {    // log(1+x). Avoids loss of precision if 1+x is close to 1
     return Vec4d(log1p(x.get_low()), log1p(x.get_high()));
 }
-static inline Vec8f log2 (Vec8f const x) {  // logarithm base 2
+static inline Vec8f log2 (Vec8f const x) {     // logarithm base 2
     return Vec8f(log2(x.get_low()), log2(x.get_high()));
 }
-static inline Vec4d log2 (Vec4d const x) {  // logarithm base 2
+static inline Vec4d log2 (Vec4d const x) {     // logarithm base 2
     return Vec4d(log2(x.get_low()), log2(x.get_high()));
 }
-static inline Vec8f log10 (Vec8f const x) { // logarithm base 10
+static inline Vec8f log10 (Vec8f const x) {    // logarithm base 10
     return Vec8f(log10(x.get_low()), log10(x.get_high()));
 }
-static inline Vec4d log10 (Vec4d const x) { // logarithm base 10
+static inline Vec4d log10 (Vec4d const x) {    // logarithm base 10
     return Vec4d(log10(x.get_low()), log10(x.get_high()));
 }
 
 // trigonometric functions (angles in radians)
-static inline Vec8f sin (Vec8f const x) {   // sine
+static inline Vec8f sin (Vec8f const x) {      // sine
     return Vec8f(sin(x.get_low()), sin(x.get_high()));
 }
-static inline Vec4d sin (Vec4d const x) {   // sine
+static inline Vec4d sin (Vec4d const x) {      // sine
     return Vec4d(sin(x.get_low()), sin(x.get_high()));
 }
-static inline Vec8f cos (Vec8f const x) {   // cosine
+static inline Vec8f cos (Vec8f const x) {      // cosine
     return Vec8f(cos(x.get_low()), cos(x.get_high()));
 }
-static inline Vec4d cos (Vec4d const x) {   // cosine
+static inline Vec4d cos (Vec4d const x) {      // cosine
     return Vec4d(cos(x.get_low()), cos(x.get_high()));
 }
-
-#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
-// no inline assembly in 64 bit MS compiler
+#ifdef SINCOS_ASM  // sincos can be fixed with inline assembly
 static inline Vec8f sincos (Vec8f * pcos, Vec8f const x) { // sine and cosine. sin(x) returned, cos(x) in pcos
-    Vec4f r_sin0, r_sin1, r_cos0, r_cos1;
-    r_sin0 = sincos(&r_cos0, x.get_low());
-    r_sin1 = sincos(&r_cos1, x.get_high());
+    Vec4f r_cos0, r_cos1;
+    Vec8f r_sin = Vec8f(sincos(&r_cos0, x.get_low()), sincos(&r_cos1, x.get_high()));
     *pcos = Vec8f(r_cos0, r_cos1);
-    return Vec8f(r_sin0, r_sin1);
+    return r_sin;
 }
+
 static inline Vec4d sincos (Vec4d * pcos, Vec4d const x) { // sine and cosine. sin(x) returned, cos(x) in pcos
-    Vec2d r_sin0, r_sin1, r_cos0, r_cos1;
-    r_sin0 = sincos(&r_cos0, x.get_low());
-    r_sin1 = sincos(&r_cos1, x.get_high());
+    Vec2d r_cos0, r_cos1;
+    Vec4d r_sin = Vec4d(sincos(&r_cos0, x.get_low()), sincos(&r_cos1, x.get_high()));
     *pcos = Vec4d(r_cos0, r_cos1);
-    return Vec4d(r_sin0, r_sin1);
+    return r_sin;
 }
-#endif // inline assembly available
+#endif  // sincos
 
-static inline Vec8f tan (Vec8f const x) {   // tangent
+static inline Vec8f tan (Vec8f const x) {      // tangent
     return Vec8f(tan(x.get_low()), tan(x.get_high()));
 }
-static inline Vec4d tan (Vec4d const x) {   // tangent
+static inline Vec4d tan (Vec4d const x) {      // tangent
     return Vec4d(tan(x.get_low()), tan(x.get_high()));
 }
 
+#ifdef TRIGPI_FUNCTIONS
+static inline Vec8f sinpi (Vec8f const x) {    // sine
+    return Vec8f(sinpi(x.get_low()), sinpi(x.get_high()));
+}
+static inline Vec4d sinpi (Vec4d const x) {    // sine
+    return Vec4d(sinpi(x.get_low()), sinpi(x.get_high()));
+}
+static inline Vec8f cospi (Vec8f const x) {    // cosine
+    return Vec8f(cospi(x.get_low()), cospi(x.get_high()));
+}
+static inline Vec4d cospi (Vec4d const x) {    // cosine
+    return Vec4d(cospi(x.get_low()), cospi(x.get_high()));
+}
+static inline Vec8f tanpi (Vec8f const x) {    // tangent
+    return Vec8f(tanpi(x.get_low()), tanpi(x.get_high()));
+}
+static inline Vec4d tanpi (Vec4d const x) {    // tangent
+    return Vec4d(tanpi(x.get_low()), tanpi(x.get_high()));
+}
+#endif
+
 // inverse trigonometric functions
-static inline Vec8f asin (Vec8f const x) {  // inverse sine
+static inline Vec8f asin (Vec8f const x) {     // inverse sine
     return Vec8f(asin(x.get_low()), asin(x.get_high()));
 }
-static inline Vec4d asin (Vec4d const x) {  // inverse sine
+static inline Vec4d asin (Vec4d const x) {     // inverse sine
     return Vec4d(asin(x.get_low()), asin(x.get_high()));
 }
-static inline Vec8f acos (Vec8f const x) {  // inverse cosine
+static inline Vec8f acos (Vec8f const x) {     // inverse cosine
     return Vec8f(acos(x.get_low()), acos(x.get_high()));
 }
-static inline Vec4d acos (Vec4d const x) {  // inverse cosine
+static inline Vec4d acos (Vec4d const x) {     // inverse cosine
     return Vec4d(acos(x.get_low()), acos(x.get_high()));
 }
-static inline Vec8f atan (Vec8f const x) {  // inverse tangent
+static inline Vec8f atan (Vec8f const x) {     // inverse tangent
     return Vec8f(atan(x.get_low()), atan(x.get_high()));
 }
-static inline Vec4d atan (Vec4d const x) {  // inverse tangent
+static inline Vec4d atan (Vec4d const x) {     // inverse tangent
     return Vec4d(atan(x.get_low()), atan(x.get_high()));
 }
-static inline Vec8f atan2 (Vec8f const a, Vec8f const b) { // inverse tangent of a/b
+static inline Vec8f atan2 (Vec8f const a, Vec8f const b) {  // inverse tangent of a/b
     return Vec8f(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
 }
-static inline Vec4d atan2 (Vec4d const a, Vec4d const b) { // inverse tangent of a/b
+static inline Vec4d atan2 (Vec4d const a, Vec4d const b) {  // inverse tangent of a/b
     return Vec4d(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
 }
 
 // hyperbolic functions
-static inline Vec8f sinh (Vec8f const x) {  // hyperbolic sine
+static inline Vec8f sinh (Vec8f const x) {     // hyperbolic sine
     return Vec8f(sinh(x.get_low()), sinh(x.get_high()));
 }
-static inline Vec4d sinh (Vec4d const x) {  // hyperbolic sine
+static inline Vec4d sinh (Vec4d const x) {     // hyperbolic sine
     return Vec4d(sinh(x.get_low()), sinh(x.get_high()));
 }
-static inline Vec8f cosh (Vec8f const x) {  // hyperbolic cosine
+static inline Vec8f cosh (Vec8f const x) {     // hyperbolic cosine
     return Vec8f(cosh(x.get_low()), cosh(x.get_high()));
 }
-static inline Vec4d cosh (Vec4d const x) {  // hyperbolic cosine
+static inline Vec4d cosh (Vec4d const x) {     // hyperbolic cosine
     return Vec4d(cosh(x.get_low()), cosh(x.get_high()));
 }
-static inline Vec8f tanh (Vec8f const x) {  // hyperbolic tangent
+static inline Vec8f tanh (Vec8f const x) {     // hyperbolic tangent
     return Vec8f(tanh(x.get_low()), tanh(x.get_high()));
 }
-static inline Vec4d tanh (Vec4d const x) {  // hyperbolic tangent
+static inline Vec4d tanh (Vec4d const x) {     // hyperbolic tangent
     return Vec4d(tanh(x.get_low()), tanh(x.get_high()));
 }
 
 // inverse hyperbolic functions
-static inline Vec8f asinh (Vec8f const x) { // inverse hyperbolic sine
+static inline Vec8f asinh (Vec8f const x) {    // inverse hyperbolic sine
     return Vec8f(asinh(x.get_low()), asinh(x.get_high()));
 }
-static inline Vec4d asinh (Vec4d const x) { // inverse hyperbolic sine
+static inline Vec4d asinh (Vec4d const x) {    // inverse hyperbolic sine
     return Vec4d(asinh(x.get_low()), asinh(x.get_high()));
 }
-static inline Vec8f acosh (Vec8f const x) { // inverse hyperbolic cosine
+static inline Vec8f acosh (Vec8f const x) {    // inverse hyperbolic cosine
     return Vec8f(acosh(x.get_low()), acosh(x.get_high()));
 }
-static inline Vec4d acosh (Vec4d const x) { // inverse hyperbolic cosine
+static inline Vec4d acosh (Vec4d const x) {    // inverse hyperbolic cosine
     return Vec4d(acosh(x.get_low()), acosh(x.get_high()));
 }
-static inline Vec8f atanh (Vec8f const x) { // inverse hyperbolic tangent
+static inline Vec8f atanh (Vec8f const x) {    // inverse hyperbolic tangent
     return Vec8f(atanh(x.get_low()), atanh(x.get_high()));
 }
-static inline Vec4d atanh (Vec4d const x) { // inverse hyperbolic tangent
+static inline Vec4d atanh (Vec4d const x) {    // inverse hyperbolic tangent
     return Vec4d(atanh(x.get_low()), atanh(x.get_high()));
 }
 
 // error function
-static inline Vec8f erf (Vec8f const x) {   // error function
+static inline Vec8f erf (Vec8f const x) {      // error function
     return Vec8f(erf(x.get_low()), erf(x.get_high()));
 }
-static inline Vec4d erf (Vec4d const x) {   // error function
+static inline Vec4d erf (Vec4d const x) {      // error function
     return Vec4d(erf(x.get_low()), erf(x.get_high()));
 }
-static inline Vec8f erfc (Vec8f const x) {  // error function complement
+static inline Vec8f erfc (Vec8f const x) {     // error function complement
     return Vec8f(erfc(x.get_low()), erfc(x.get_high()));
 }
-static inline Vec4d erfc (Vec4d const x) {  // error function complement
+static inline Vec4d erfc (Vec4d const x) {     // error function complement
     return Vec4d(erfc(x.get_low()), erfc(x.get_high()));
 }
-static inline Vec8f erfinv (Vec8f const x) {     // inverse error function
+static inline Vec8f erfinv (Vec8f const x) {   // inverse error function
     return Vec8f(erfinv(x.get_low()), erfinv(x.get_high()));
 }
-static inline Vec4d erfinv (Vec4d const x) {     // inverse error function
+static inline Vec4d erfinv (Vec4d const x) {   // inverse error function
     return Vec4d(erfinv(x.get_low()), erfinv(x.get_high()));
 }
 
-static inline Vec8f cdfnorm (Vec8f const x) {    // cumulative normal distribution function
+static inline Vec8f cdfnorm (Vec8f const x) {  // cumulative normal distribution function
     return Vec8f(cdfnorm(x.get_low()), cdfnorm(x.get_high()));
 }
-static inline Vec4d cdfnorm (Vec4d const x) {    // cumulative normal distribution function
+static inline Vec4d cdfnorm (Vec4d const x) {  // cumulative normal distribution function
     return Vec4d(cdfnorm(x.get_low()), cdfnorm(x.get_high()));
 }
 static inline Vec8f cdfnorminv (Vec8f const x) { // inverse cumulative normal distribution function
@@ -1292,7 +1408,8 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal di
 
 #endif   // VECTORF256_H
 
-#endif   // MAX_VECTOR_SIZE >= 256
+#endif  // 256 bits
+
 
 #if defined (MAX_VECTOR_SIZE) && MAX_VECTOR_SIZE >= 512    // 512 bit vectors
 
@@ -1301,7 +1418,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal di
 #ifdef __INTEL_COMPILER
 /*****************************************************************************
 *
-*      512-bit vector functions using Intel compiler
+*      512-bit vector functions using Intel compiler intrinsic functions
 *
 *****************************************************************************/
 
@@ -1312,10 +1429,10 @@ static inline Vec16f exp(Vec16f const x) {       // exponential function
 static inline Vec8d exp(Vec8d const x) {         // exponential function
     return _mm512_exp_pd(x);
 }
-static inline Vec16f expm1(Vec16f const x) {     // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec16f expm1(Vec16f const x) {     // exp(x)-1
     return _mm512_expm1_ps(x);
 }
-static inline Vec8d expm1(Vec8d const x) {       // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec8d expm1(Vec8d const x) {       // exp(x)-1
     return _mm512_expm1_pd(x);
 }
 static inline Vec16f exp2(Vec16f const x) {      // pow(2,x)
@@ -1333,13 +1450,13 @@ static inline Vec8d exp10(Vec8d const x) {       // pow(10,x)
 static inline Vec16f pow(Vec16f const a, Vec16f const b) { // pow(a,b) = a to the power of b
     return _mm512_pow_ps(a, b);
 }
-static inline Vec16f pow(Vec16f const a, float const b) {  // pow(a,b) = a to the power of b
+static inline Vec16f pow(Vec16f const a, float const b) {  // pow(a,b)
     return _mm512_pow_ps(a, Vec16f(b));
 }
-static inline Vec8d pow(Vec8d const a, Vec8d const b) {    // pow(a,b) = a to the power of b
+static inline Vec8d pow(Vec8d const a, Vec8d const b) {    // pow(a,b)
     return _mm512_pow_pd(a, b);
 }
-static inline Vec8d pow(Vec8d const a, double const b) {   // pow(a,b) = a to the power of b
+static inline Vec8d pow(Vec8d const a, double const b) {   // pow(a,b)
     return _mm512_pow_pd(a, Vec8d(b));
 }
 static inline Vec16f cbrt(Vec16f const x) {      // pow(x,1/3)
@@ -1355,10 +1472,10 @@ static inline Vec16f log(Vec16f const x) {       // natural logarithm
 static inline Vec8d log(Vec8d const x) {         // natural logarithm
     return _mm512_log_pd(x);
 }
-static inline Vec16f log1p(Vec16f const x) {     // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec16f log1p(Vec16f const x) {     // log(1+x)
     return _mm512_log1p_ps(x);
 }
-static inline Vec8d log1p(Vec8d const x) {       // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec8d log1p(Vec8d const x) {       // log(1+x)
     return _mm512_log1p_pd(x);
 }
 static inline Vec16f log2(Vec16f const x) {      // logarithm base 2
@@ -1406,6 +1523,31 @@ static inline Vec8d tan(Vec8d const x) {         // tangent
     return _mm512_tan_pd(x);
 }
 
+#ifdef TRIGPI_FUNCTIONS
+
+static inline Vec16f sinpi(Vec16f const x) {     // sine
+    return _mm512_sinpi_ps(x);
+}
+static inline Vec8d sinpi(Vec8d const x) {       // sine
+    return _mm512_sinpi_pd(x);
+}
+static inline Vec16f cospi(Vec16f const x) {     // cosine
+    return _mm512_cospi_ps(x);
+}
+static inline Vec8d cospi(Vec8d const x) {       // cosine
+    return _mm512_cospi_pd(x);
+}
+static inline Vec16f tanpi(Vec16f const x) {     // tangent
+    return _mm512_tanpi_ps(x);
+}
+/*
+static inline Vec8d tanpi(Vec8d const x) {       // tangent
+    // bug in compiler intrinsic? expecting argument __m512, should be __m512d
+    return _mm512_tanpi_pd(x);
+} */
+
+#endif  // TRIGPI_FUNCTIONS
+
 // inverse trigonometric functions
 static inline Vec16f asin(Vec16f const x) {      // inverse sine
     return _mm512_asin_ps(x);
@@ -1512,70 +1654,76 @@ static inline Vec8d cdfnorminv(Vec8d const x) {  // inverse cumulative normal di
 *
 *****************************************************************************/
 
-// External function prototypes, 512-bit vectors
+// External function prototypes for SVML library, 512-bit vectors
 extern "C" {
     extern __m512  V_VECTORCALL __svml_expf16       (__m512);
-    extern __m512d V_VECTORCALL __svml_exp8        (__m512d);
+    extern __m512d V_VECTORCALL __svml_exp8         (__m512d);
     extern __m512  V_VECTORCALL __svml_expm1f16     (__m512);
-    extern __m512d V_VECTORCALL __svml_expm18      (__m512d);
+    extern __m512d V_VECTORCALL __svml_expm18       (__m512d);
     extern __m512  V_VECTORCALL __svml_exp2f16      (__m512);
-    extern __m512d V_VECTORCALL __svml_exp28       (__m512d);
+    extern __m512d V_VECTORCALL __svml_exp28        (__m512d);
     extern __m512  V_VECTORCALL __svml_exp10f16     (__m512);
-    extern __m512d V_VECTORCALL __svml_exp108      (__m512d);
-    extern __m512  V_VECTORCALL __svml_powf16       (__m512,  __m512);
-    extern __m512d V_VECTORCALL __svml_pow8        (__m512d, __m512d);
+    extern __m512d V_VECTORCALL __svml_exp108       (__m512d);
+    extern __m512  V_VECTORCALL2 __svml_powf16      (__m512,  __m512);
+    extern __m512d V_VECTORCALL2 __svml_pow8        (__m512d, __m512d);
     extern __m512  V_VECTORCALL __svml_cbrtf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_cbrt8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_cbrt8        (__m512d);
     extern __m512  V_VECTORCALL __svml_invsqrtf16   (__m512);
-    extern __m512d V_VECTORCALL __svml_invsqrt8    (__m512d);
+    extern __m512d V_VECTORCALL __svml_invsqrt8     (__m512d);
     extern __m512  V_VECTORCALL __svml_logf16       (__m512);
-    extern __m512d V_VECTORCALL __svml_log8        (__m512d);
+    extern __m512d V_VECTORCALL __svml_log8         (__m512d);
     extern __m512  V_VECTORCALL __svml_log1pf16     (__m512);
-    extern __m512d V_VECTORCALL __svml_log1p8      (__m512d);
+    extern __m512d V_VECTORCALL __svml_log1p8       (__m512d);
     extern __m512  V_VECTORCALL __svml_log2f16      (__m512);
-    extern __m512d V_VECTORCALL __svml_log28       (__m512d);
+    extern __m512d V_VECTORCALL __svml_log28        (__m512d);
     extern __m512  V_VECTORCALL __svml_log10f16     (__m512);
-    extern __m512d V_VECTORCALL __svml_log108      (__m512d);
+    extern __m512d V_VECTORCALL __svml_log108       (__m512d);
     extern __m512  V_VECTORCALL __svml_sinf16       (__m512);
-    extern __m512d V_VECTORCALL __svml_sin8        (__m512d);
+    extern __m512d V_VECTORCALL __svml_sin8         (__m512d);
     extern __m512  V_VECTORCALL __svml_cosf16       (__m512);
-    extern __m512d V_VECTORCALL __svml_cos8        (__m512d);
-    extern __m512  V_VECTORCALL __svml_sincosf16    (__m512); // cos returned in ymm1
-    extern __m512d V_VECTORCALL __svml_sincos8     (__m512d); // cos returned in ymm1
+    extern __m512d V_VECTORCALL __svml_cos8         (__m512d);
+    extern __m512  V_VECTORCALL2 __svml_sincosf16   (__m512);  // cos returned in ymm1
+    extern __m512d V_VECTORCALL2 __svml_sincos8     (__m512d); // cos returned in ymm1
     extern __m512  V_VECTORCALL __svml_tanf16       (__m512);
-    extern __m512d V_VECTORCALL __svml_tan8        (__m512d);
+    extern __m512d V_VECTORCALL __svml_tan8         (__m512d);
+    extern __m512  V_VECTORCALL __svml_sinpif16     (__m512);
+    extern __m512d V_VECTORCALL __svml_sinpi8       (__m512d);
+    extern __m512  V_VECTORCALL __svml_cospif16     (__m512);
+    extern __m512d V_VECTORCALL __svml_cospi8       (__m512d);
+    extern __m512  V_VECTORCALL __svml_tanpif16     (__m512);
+    extern __m512d V_VECTORCALL __svml_tanpi8       (__m512d);
     extern __m512  V_VECTORCALL __svml_asinf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_asin8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_asin8        (__m512d);
     extern __m512  V_VECTORCALL __svml_acosf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_acos8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_acos8        (__m512d);
     extern __m512  V_VECTORCALL __svml_atanf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_atan8       (__m512d);
-    extern __m512  V_VECTORCALL __svml_atan2f16     (__m512, __m512);
-    extern __m512d V_VECTORCALL __svml_atan28      (__m512d, __m512d);
+    extern __m512d V_VECTORCALL __svml_atan8        (__m512d);
+    extern __m512  V_VECTORCALL2 __svml_atan2f16    (__m512, __m512);
+    extern __m512d V_VECTORCALL2 __svml_atan28      (__m512d, __m512d);
     extern __m512  V_VECTORCALL __svml_sinhf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_sinh8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_sinh8        (__m512d);
     extern __m512  V_VECTORCALL __svml_coshf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_cosh8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_cosh8        (__m512d);
     extern __m512  V_VECTORCALL __svml_tanhf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_tanh8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_tanh8        (__m512d);
     extern __m512  V_VECTORCALL __svml_asinhf16     (__m512);
-    extern __m512d V_VECTORCALL __svml_asinh8      (__m512d);
+    extern __m512d V_VECTORCALL __svml_asinh8       (__m512d);
     extern __m512  V_VECTORCALL __svml_acoshf16     (__m512);
-    extern __m512d V_VECTORCALL __svml_acosh8      (__m512d);
+    extern __m512d V_VECTORCALL __svml_acosh8       (__m512d);
     extern __m512  V_VECTORCALL __svml_atanhf16     (__m512);
-    extern __m512d V_VECTORCALL __svml_atanh8      (__m512d);
+    extern __m512d V_VECTORCALL __svml_atanh8       (__m512d);
     extern __m512  V_VECTORCALL __svml_erff16       (__m512);
-    extern __m512d V_VECTORCALL __svml_erf8        (__m512d);
+    extern __m512d V_VECTORCALL __svml_erf8         (__m512d);
     extern __m512  V_VECTORCALL __svml_erfcf16      (__m512);
-    extern __m512d V_VECTORCALL __svml_erfc8       (__m512d);
+    extern __m512d V_VECTORCALL __svml_erfc8        (__m512d);
     extern __m512  V_VECTORCALL __svml_erfinvf16    (__m512);
-    extern __m512d V_VECTORCALL __svml_erfinv8     (__m512d);
+    extern __m512d V_VECTORCALL __svml_erfinv8      (__m512d);
     extern __m512  V_VECTORCALL __svml_cdfnorminvf16(__m512);
-    extern __m512d V_VECTORCALL __svml_cdfnorminv8 (__m512d);
+    extern __m512d V_VECTORCALL __svml_cdfnorminv8  (__m512d);
     extern __m512  V_VECTORCALL __svml_cdfnormf16   (__m512);
-    extern __m512d V_VECTORCALL __svml_cdfnorm8    (__m512d);
+    extern __m512d V_VECTORCALL __svml_cdfnorm8     (__m512d);
     //extern __m512  V_VECTORCALL __svml_cexpf16    (__m512);
-    //extern __m512d V_VECTORCALL __svml_cexp8     (__m512d);
+    //extern __m512d V_VECTORCALL __svml_cexp8      (__m512d);
 }
 
 
@@ -1607,13 +1755,13 @@ static inline Vec8d exp10 (Vec8d const x) {      // pow(10,x)
 static inline Vec16f pow (Vec16f const a, Vec16f const b) {  // pow(a,b) = a to the power of b
     return  __svml_powf16(a,b);
 }
-static inline Vec16f pow (Vec16f const a, float const b) {   // pow(a,b) = a to the power of b
+static inline Vec16f pow (Vec16f const a, float const b) {   // pow(a,b)
     return  __svml_powf16(a,Vec16f(b));
 }
-static inline Vec8d pow (Vec8d const a, Vec8d const b) {     // pow(a,b) = a to the power of b
+static inline Vec8d pow (Vec8d const a, Vec8d const b) {     // pow(a,b)
     return  __svml_pow8(a,b);
 }
-static inline Vec8d pow (Vec8d const a, double const b) {    // pow(a,b) = a to the power of b
+static inline Vec8d pow (Vec8d const a, double const b) {    // pow(a,b)
     return  __svml_pow8(a,Vec8d(b));
 }
 static inline Vec16f cbrt (Vec16f const x) {     // pow(x,1/3)
@@ -1630,10 +1778,10 @@ static inline Vec16f log (Vec16f const x) {      // natural logarithm
 static inline Vec8d log (Vec8d const x) {        // natural logarithm
     return  __svml_log8(x);
 }
-static inline Vec16f log1p (Vec16f const x) {    // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec16f log1p (Vec16f const x) {    // log(1+x)
     return  __svml_log1pf16(x);
 }
-static inline Vec8d log1p (Vec8d const x) {      // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec8d log1p (Vec8d const x) {      // log(1+x)
     return  __svml_log1p8(x);
 }
 static inline Vec16f log2 (Vec16f const x) {     // logarithm base 2
@@ -1663,17 +1811,13 @@ static inline Vec8d cos (Vec8d const x) {        // cosine
     return  __svml_cos8(x);
 }
 
-#if defined(__unix__) || defined(__INTEL_COMPILER) //|| !defined(__x86_64__) || !defined(_MSC_VER)
+#ifdef SINCOS_ASM  // sincos can be fixed with inline assembly
 // no inline assembly in 64 bit MS compiler
 // sine and cosine. sin(x) returned, cos(x) in pcos
 static inline Vec16f sincos (Vec16f * pcos, Vec16f const x) {
     __m512 r_sin, r_cos;
     r_sin = __svml_sincosf16(x);
-#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "vmovaps %%zmm1, %0":"=m"(r_cos));
-#else // Windows
-    // _asm vmovaps r_cos, zmm1; // does not work in VS 2019
-#endif
     *pcos = r_cos;
     return r_sin;
 }
@@ -1681,15 +1825,11 @@ static inline Vec16f sincos (Vec16f * pcos, Vec16f const x) {
 static inline Vec8d sincos (Vec8d * pcos, Vec8d const x) {
     __m512d r_sin, r_cos;
     r_sin = __svml_sincos8(x);
-#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "vmovaps %%zmm1, %0":"=m"(r_cos));
-#else // Windows
-    // _asm vmovapd r_cos, zmm1;  // does not work in VS 2019
-#endif
     *pcos = r_cos;
     return r_sin;
 }
-#endif // inline assembly available
+#endif // sincos
 
 static inline Vec16f tan (Vec16f const x) {      // tangent
     return  __svml_tanf16(x);
@@ -1698,6 +1838,25 @@ static inline Vec8d tan (Vec8d const x) {        // tangent
     return  __svml_tan8(x);
 }
 
+static inline Vec16f sinpi (Vec16f const x) {    // sine
+    return  __svml_sinpif16(x);
+}
+static inline Vec8d sinpi (Vec8d const x) {      // sine
+    return  __svml_sinpi8(x);
+}
+static inline Vec16f cospi (Vec16f const x) {    // cosine
+    return  __svml_cospif16(x);
+}
+static inline Vec8d cospi (Vec8d const x) {      // cosine
+    return  __svml_cospi8(x);
+}
+static inline Vec16f tanpi (Vec16f const x) {    // tangent
+    return  __svml_tanpif16(x);
+}
+static inline Vec8d tanpi (Vec8d const x) {      // tangent
+    return  __svml_tanpi8(x);
+}
+
 // inverse trigonometric functions
 static inline Vec16f asin (Vec16f const x) {     // inverse sine
     return  __svml_asinf16(x);
@@ -1798,6 +1957,7 @@ static inline Vec8d cdfnorminv (Vec8d const x) {    // inverse cumulative normal
 #endif   // __INTEL_COMPILER
 
 #else    // VECTORF512_H
+
 /*****************************************************************************
 *
 *      512-bit vector functions emulated with 256-bit vectors
@@ -1811,10 +1971,10 @@ static inline Vec16f exp (Vec16f const x) {      // exponential function
 static inline Vec8d exp (Vec8d const x) {        // exponential function
     return Vec8d(exp(x.get_low()), exp(x.get_high()));
 }
-static inline Vec16f expm1 (Vec16f const x) {    // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec16f expm1 (Vec16f const x) {    // exp(x)-1
     return Vec16f(expm1(x.get_low()), expm1(x.get_high()));
 }
-static inline Vec8d expm1 (Vec8d const x) {      // exp(x)-1. Avoids loss of precision if x is close to 1
+static inline Vec8d expm1 (Vec8d const x) {      // exp(x)-1
     return Vec8d(expm1(x.get_low()), expm1(x.get_high()));
 }
 static inline Vec16f exp2 (Vec16f const x) {     // pow(2,x)
@@ -1832,13 +1992,13 @@ static inline Vec8d exp10 (Vec8d const x) {      // pow(10,x)
 static inline Vec16f pow (Vec16f const a, Vec16f const b) {  // pow(a,b) = a to the power of b
     return Vec16f(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
 }
-static inline Vec16f pow (Vec16f const a, float const b) {   // pow(a,b) = a to the power of b
+static inline Vec16f pow (Vec16f const a, float const b) {   // pow(a,b)
     return Vec16f(pow(a.get_low(),b), pow(a.get_high(),b));
 }
-static inline Vec8d pow (Vec8d const a, Vec8d const b) {     // pow(a,b) = a to the power of b
+static inline Vec8d pow (Vec8d const a, Vec8d const b) {     // pow(a,b)
     return Vec8d(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
 }
-static inline Vec8d pow (Vec8d const a, double const b) {    // pow(a,b) = a to the power of b
+static inline Vec8d pow (Vec8d const a, double const b) {    // pow(a,b)
     return Vec8d(pow(a.get_low(),b), pow(a.get_high(),b));
 }
 static inline Vec16f cbrt (Vec16f const x) {     // pow(x,1/3)
@@ -1855,10 +2015,10 @@ static inline Vec16f log (Vec16f const x) {      // natural logarithm
 static inline Vec8d log (Vec8d const x) {        // natural logarithm
     return Vec8d(log(x.get_low()), log(x.get_high()));
 }
-static inline Vec16f log1p (Vec16f const x) {    // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec16f log1p (Vec16f const x) {    // log(1+x)
     return Vec16f(log1p(x.get_low()), log1p(x.get_high()));
 }
-static inline Vec8d log1p (Vec8d const x) {      // log(1+x). Avoids loss of precision if 1+x is close to 1
+static inline Vec8d log1p (Vec8d const x) {      // log(1+x)
     return Vec8d(log1p(x.get_low()), log1p(x.get_high()));
 }
 static inline Vec16f log2 (Vec16f const x) {     // logarithm base 2
@@ -1887,25 +2047,21 @@ static inline Vec16f cos (Vec16f const x) {      // cosine
 static inline Vec8d cos (Vec8d const x) {        // cosine
     return Vec8d(cos(x.get_low()), cos(x.get_high()));
 }
-
-#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
-// no inline assembly in 64 bit MS compiler
+#ifdef SINCOS_ASM  // sincos can be fixed with inline assembly
 static inline Vec16f sincos (Vec16f * pcos, Vec16f const x) {  // sine and cosine. sin(x) returned, cos(x) in pcos
-    Vec8f r_sin0, r_sin1, r_cos0, r_cos1;
-    r_sin0 = sincos(&r_cos0, x.get_low());
-    r_sin1 = sincos(&r_cos1, x.get_high());
+    Vec8f r_cos0, r_cos1;
+    Vec16f r_sin = Vec16f(sincos(&r_cos0, x.get_low()), sincos(&r_cos1, x.get_high()));
     *pcos = Vec16f(r_cos0, r_cos1);
-    return Vec16f(r_sin0, r_sin1);
+    return r_sin;
 }
+
 static inline Vec8d sincos (Vec8d * pcos, Vec8d const x) {     // sine and cosine. sin(x) returned, cos(x) in pcos
-    Vec4d r_sin0, r_sin1, r_cos0, r_cos1;
-    r_sin0 = sincos(&r_cos0, x.get_low());
-    r_sin1 = sincos(&r_cos1, x.get_high());
+    Vec4d r_cos0, r_cos1;
+    Vec8d r_sin = Vec8d(sincos(&r_cos0, x.get_low()), sincos(&r_cos1, x.get_high()));
     *pcos = Vec8d(r_cos0, r_cos1);
-    return Vec8d(r_sin0, r_sin1);
+    return r_sin;
 }
-#endif // inline assembly available
-
+#endif  // sincos
 
 static inline Vec16f tan (Vec16f const x) {      // tangent
     return Vec16f(tan(x.get_low()), tan(x.get_high()));
@@ -1914,6 +2070,27 @@ static inline Vec8d tan (Vec8d const x) {        // tangent
     return Vec8d(tan(x.get_low()), tan(x.get_high()));
 }
 
+#ifdef TRIGPI_FUNCTIONS
+static inline Vec16f sinpi (Vec16f const x) {    // sine
+    return Vec16f(sinpi(x.get_low()), sinpi(x.get_high()));
+}
+static inline Vec8d sinpi (Vec8d const x) {      // sine
+    return Vec8d(sinpi(x.get_low()), sinpi(x.get_high()));
+}
+static inline Vec16f cospi (Vec16f const x) {    // cosine
+    return Vec16f(cospi(x.get_low()), cospi(x.get_high()));
+}
+static inline Vec8d cospi (Vec8d const x) {      // cosine
+    return Vec8d(cospi(x.get_low()), cospi(x.get_high()));
+}
+static inline Vec16f tanpi (Vec16f const x) {    // tangent
+    return Vec16f(tanpi(x.get_low()), tanpi(x.get_high()));
+}
+static inline Vec8d tanpi (Vec8d const x) {      // tangent
+    return Vec8d(tanpi(x.get_low()), tanpi(x.get_high()));
+}
+#endif
+
 // inverse trigonometric functions
 static inline Vec16f asin (Vec16f const x) {     // inverse sine
     return Vec16f(asin(x.get_low()), asin(x.get_high()));
@@ -2006,10 +2183,10 @@ static inline Vec16f cdfnorm (Vec16f const x) {  // cumulative normal distributi
 static inline Vec8d cdfnorm (Vec8d const x) {    // cumulative normal distribution function
     return Vec8d(cdfnorm(x.get_low()), cdfnorm(x.get_high()));
 }
-static inline Vec16f cdfnorminv (Vec16f const x) { // inverse cumulative normal distribution function
+static inline Vec16f cdfnorminv (Vec16f const x) {// inverse cumulative normal distribution function
     return Vec16f(cdfnorminv(x.get_low()), cdfnorminv(x.get_high()));
 }
-static inline Vec8d cdfnorminv (Vec8d const x) {   // inverse cumulative normal distribution function
+static inline Vec8d cdfnorminv (Vec8d const x) {  // inverse cumulative normal distribution function
     return Vec8d(cdfnorminv(x.get_low()), cdfnorminv(x.get_high()));
 }
 
diff --git a/vectormath_trig.h b/vectormath_trig.h
index 1acd606..c33e354 100644
--- a/vectormath_trig.h
+++ b/vectormath_trig.h
@@ -1,7 +1,7 @@
 /****************************  vectormath_trig.h   ******************************
 * Author:        Agner Fog
 * Date created:  2014-04-18
-* Last modified: 2022-07-20
+* Last modified: 2022-07-26
 * Version:       2.02.00
 * Project:       vector class library
 * Description:
@@ -25,7 +25,7 @@
 ******************************************************************************/
 
 #ifndef VECTORMATH_TRIG_H
-#define VECTORMATH_TRIG_H  2
+#define VECTORMATH_TRIG_H  202
 
 #include "vectormath_common.h"
 
@@ -66,13 +66,13 @@ static inline VTYPE sincos_d(VTYPE * cosret, VTYPE const xx) {
     const double DP3 = 3.06161699786838294307E-17 * 2.;
 
     typedef decltype(roundi(xx)) ITYPE;          // integer vector type
-    typedef decltype(nan_code(xx)) UITYPE;       // unsigned integer vector type
+    //typedef decltype(nan_code(xx)) UITYPE;       // unsigned integer vector type
     typedef decltype(xx < xx) BVTYPE;            // boolean vector type
 
     VTYPE  xa, x, y, x2, s, c, sin1, cos1;       // data vectors
-    ITYPE  q, qq, signsin, signcos;              // integer vectors, 64 bit
+    ITYPE  q, signsin, signcos;                  // integer vectors, 64 bit
 
-    BVTYPE swap, overflow;                       // boolean vectors
+    BVTYPE swap;                                 // boolean vector
 
 #if INSTRSET < 8  // no FMA
     const double input_limit = 1.E13;            // lower overflow limit without FMA
@@ -243,7 +243,7 @@ static inline VTYPE sincos_f(VTYPE* cosret, VTYPE const xx) {
     const float P2cosf = 2.443315711809948E-5f;
 
     typedef decltype(roundi(xx)) ITYPE;          // integer vector type
-    typedef decltype(nan_code(xx)) UITYPE;       // unsigned integer vector type
+    //typedef decltype(nan_code(xx)) UITYPE;       // unsigned integer vector type
     typedef decltype(xx < xx) BVTYPE;            // boolean vector type
     
 #if INSTRSET < 8  // no FMA
@@ -254,7 +254,7 @@ static inline VTYPE sincos_f(VTYPE* cosret, VTYPE const xx) {
 
     VTYPE  xa, x, y, x2, s, c, sin1, cos1;       // data vectors
     ITYPE  q, signsin, signcos;                  // integer vectors
-    BVTYPE swap, overflow;                       // boolean vectors
+    BVTYPE swap;                                 // boolean vector
 
     xa = abs(xx);
 
@@ -452,7 +452,7 @@ static inline VTYPE tan_d(VTYPE const x) {
 
     typedef decltype(x > x) BVTYPE;         // boolean vector type
     VTYPE  xa, y, z, zz, px, qx, tn, recip; // data vectors
-    BVTYPE doinvert, xzero, overflow;       // boolean vectors
+    BVTYPE doinvert;                        // boolean vector
     typedef decltype(nan_code(x)) UITYPE;   // unsigned integer vector type
 
     xa = abs(x);
@@ -584,7 +584,7 @@ static inline VTYPE asin_d(VTYPE const x) {
     const double Q1asin = 1.395105614657485689735E2;
     const double Q0asin = -4.918853881490881290097E1;
 
-    VTYPE  xa, xb, x1, x2, x3, x4, x5, px, qx, rx, sx, vx, wx, y1, yb, z, z1, z2;
+    VTYPE  xa, xb, x1, x2, x3, x4, x5, px, qx, rx, sx, vx, wx, y1, z, z1, z2;
     bool   dobig, dosmall;
 
     xa = abs(x);