From 08959ebe6ea5d8317330b242e28ba0d2938ac52f Mon Sep 17 00:00:00 2001
From: Agner Fog <AgnerF@users.noreply.github.com>
Date: Sun, 7 Aug 2022 11:18:22 +0200
Subject: [PATCH] Add files via upload

Version 2.02.00
---
 instrset.h       | 12 ++++++++++-
 vectormath_lib.h | 56 +++++++++++++++++++++++++++---------------------
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/instrset.h b/instrset.h
index 4adf832..5f646da 100644
--- a/instrset.h
+++ b/instrset.h
@@ -1,7 +1,7 @@
 /****************************  instrset.h   **********************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2022-07-21
+* Last modified: 2022-07-26
 * Version:       2.02.00
 * Project:       vector class library
 * Description:
@@ -23,6 +23,16 @@
 #ifndef INSTRSET_H
 #define INSTRSET_H 20200
 
+// check if compiled for C++17
+#if defined(_MSVC_LANG)  // MS compiler has its own version of __cplusplus with different value
+#if _MSVC_LANG < 201703
+#error Please compile for C++17 or higher
+#endif
+#else  // all other compilers
+#if __cplusplus < 201703
+#error Please compile for C++17 or higher
+#endif
+#endif
 
 // Allow the use of floating point permute instructions on integer vectors.
 // Some CPU's have an extra latency of 1 or 2 clock cycles for this, but
diff --git a/vectormath_lib.h b/vectormath_lib.h
index 905a7ad..faa2a24 100644
--- a/vectormath_lib.h
+++ b/vectormath_lib.h
@@ -1,19 +1,18 @@
 /****************************  vectormath_lib.h   *****************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2022-07-26
+* Last modified: 2022-08-02
 * Version:       2.02.00
 * Project:       vector class library
 * Description:
 * Header file defining mathematical functions on floating point vectors
-* using Intel SVML library
-*
-* Instructions to use SVML library:
-* Include this file and link with svml
+* using Intel SVML (Short Vector Math Library)
 *
+* Include this file if you want to use SVML for math functions on vectors
+* See vcl_manual.pdf for details on how to obtain the SVML library and link to it.
 * Alternatively, use the inline math functions by including
-* vectormath_exp.h for power and exponential functions
-* vectormath_trig.h for trigonometric functions
+* vectormath_exp.h for power and exponential functions,
+* vectormath_trig.h for trigonometric functions,
 * vectormath_hyp.h for hyperbolic functions
 *
 * For detailed instructions, see vcl_manual.pdf
@@ -36,6 +35,16 @@
 namespace VCL_NAMESPACE {    // optional name space
 #endif
 
+#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
+#define USE_SVML_INTRINSICS  // Intel compilers have intrinsic functions of access to SVML library
+#endif
+
+#if !(defined(USE_SVML_INTRINSICS))
+// sinpi, cospi, and tanpi functions are included in SVML, but undocumented
+// (The "Classic" version of Intel compiler accepts the intrinsics of these functions even though they are not in the header files)
+#define TRIGPI_FUNCTIONS
+#endif
+
 #if defined(__clang__) || defined (__GNUC__)
 #define SINCOS_ASM  // sincos can be fixed with inline assembly
 #else
@@ -43,12 +52,8 @@ namespace VCL_NAMESPACE {    // optional name space
 #endif
 
 
-#if !(defined(__INTEL_COMPILER) && defined(__clang__))
-#define TRIGPI_FUNCTIONS   // sinpi etc. not yet defined intel icpx compiler 2022.1
-#endif
-
 
-#ifdef __INTEL_COMPILER
+#ifdef USE_SVML_INTRINSICS
 
 /*****************************************************************************
 *
@@ -284,7 +289,7 @@ static inline Vec2d cdfnorminv(Vec2d const x) {  // inverse cumulative normal di
 *
 *************************************************************************************/
 
-#if (defined(_WIN64) && !defined(__INTEL_COMPILER) )
+#if (defined(_WIN64) && !defined(USE_SVML_INTRINSICS) )
 // (call with one parameter may work without __vectorcall because the parameter happens to be in zmm0, but that would be unsafe)
 #define V_VECTORCALL  __vectorcall  // fix calling convention, one parameter.
 #define V_VECTORCALL2 __vectorcall  // fix calling convention, two parameters or two returns
@@ -627,7 +632,7 @@ static inline Vec2d cdfnorminv (Vec2d const x) { // inverse cumulative normal di
     return  __svml_cdfnorminv2(x);
 }
 
-#endif   // __INTEL_COMPILER
+#endif   // USE_SVML_INTRINSICS
 
 
 
@@ -635,7 +640,7 @@ static inline Vec2d cdfnorminv (Vec2d const x) { // inverse cumulative normal di
 
 #if defined (VECTORF256_H)  // 256-bit vector registers supported
 
-#ifdef __INTEL_COMPILER
+#ifdef USE_SVML_INTRINSICS
 /*****************************************************************************
 *
 *      256-bit vector functions using Intel compiler intrinsic functions
@@ -863,7 +868,7 @@ static inline Vec4d cdfnorminv(Vec4d const x) {// inverse cumulative normal dist
     return _mm256_cdfnorminv_pd(x);
 }
 
-#else    // not __INTEL_COMPILER
+#else    // not USE_SVML_INTRINSICS
 /*****************************************************************************
 *
 *      256-bit vector functions using other compiler than Intel
@@ -1170,7 +1175,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) {  // inverse cumulative normal d
     return  __svml_cdfnorminv4(x);
 }
 
-#endif   // __INTEL_COMPILER
+#endif   // USE_SVML_INTRINSICS
 
 #else    // not VECTORF256_H
 
@@ -1415,7 +1420,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal di
 
 #if defined (VECTORF512_H)  // 512-bit vector registers supported
 
-#ifdef __INTEL_COMPILER
+#ifdef USE_SVML_INTRINSICS
 /*****************************************************************************
 *
 *      512-bit vector functions using Intel compiler intrinsic functions
@@ -1540,12 +1545,15 @@ static inline Vec8d cospi(Vec8d const x) {       // cosine
 static inline Vec16f tanpi(Vec16f const x) {     // tangent
     return _mm512_tanpi_ps(x);
 }
-/*
+
 static inline Vec8d tanpi(Vec8d const x) {       // tangent
-    // bug in compiler intrinsic? expecting argument __m512, should be __m512d
+#ifdef __INTEL_COMPILER
+    // see https://community.intel.com/t5/Intel-C-Compiler/mm512-tanpi-pd-wrong-declaration/m-p/1404627
+    return _mm512_castps_pd(_mm512_tanpi_pd(_mm512_castpd_ps(x)));
+#else
     return _mm512_tanpi_pd(x);
-} */
-
+#endif
+}
 #endif  // TRIGPI_FUNCTIONS
 
 // inverse trigonometric functions
@@ -1647,7 +1655,7 @@ static inline Vec8d cdfnorminv(Vec8d const x) {  // inverse cumulative normal di
     return _mm512_cdfnorminv_pd(x);
 }
 
-#else    // __INTEL_COMPILER
+#else    // USE_SVML_INTRINSICS
 /*****************************************************************************
 *
 *      512-bit vector functions using other compiler than Intel
@@ -1954,7 +1962,7 @@ static inline Vec8d cdfnorminv (Vec8d const x) {    // inverse cumulative normal
     return  __svml_cdfnorminv8(x);
 }
 
-#endif   // __INTEL_COMPILER
+#endif   // USE_SVML_INTRINSICS
 
 #else    // VECTORF512_H