|
1 | 1 | /**************************** vectormath_lib.h *****************************
|
2 | 2 | * Author: Agner Fog
|
3 | 3 | * Date created: 2012-05-30
|
4 |
| -* Last modified: 2022-07-26 |
| 4 | +* Last modified: 2022-08-02 |
5 | 5 | * Version: 2.02.00
|
6 | 6 | * Project: vector class library
|
7 | 7 | * Description:
|
8 | 8 | * Header file defining mathematical functions on floating point vectors
|
9 |
| -* using Intel SVML library |
10 |
| -* |
11 |
| -* Instructions to use SVML library: |
12 |
| -* Include this file and link with svml |
| 9 | +* using Intel SVML (Short Vector Math Library) |
13 | 10 | *
|
| 11 | +* Include this file if you want to use SVML for math functions on vectors |
| 12 | +* See vcl_manual.pdf for details on how to obtain the SVML library and link to it. |
14 | 13 | * Alternatively, use the inline math functions by including
|
15 |
| -* vectormath_exp.h for power and exponential functions |
16 |
| -* vectormath_trig.h for trigonometric functions |
| 14 | +* vectormath_exp.h for power and exponential functions, |
| 15 | +* vectormath_trig.h for trigonometric functions, |
17 | 16 | * vectormath_hyp.h for hyperbolic functions
|
18 | 17 | *
|
19 | 18 | * For detailed instructions, see vcl_manual.pdf
|
|
36 | 35 | namespace VCL_NAMESPACE { // optional name space
|
37 | 36 | #endif
|
38 | 37 |
|
| 38 | +#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) |
| 39 | +#define USE_SVML_INTRINSICS // Intel compilers have intrinsic functions of access to SVML library |
| 40 | +#endif |
| 41 | + |
| 42 | +#if !(defined(USE_SVML_INTRINSICS)) |
| 43 | +// sinpi, cospi, and tanpi functions are included in SVML, but undocumented |
| 44 | +// (The "Classic" version of Intel compiler accepts the intrinsics of these functions even though they are not in the header files) |
| 45 | +#define TRIGPI_FUNCTIONS |
| 46 | +#endif |
| 47 | + |
39 | 48 | #if defined(__clang__) || defined (__GNUC__)
|
40 | 49 | #define SINCOS_ASM // sincos can be fixed with inline assembly
|
41 | 50 | #else
|
42 | 51 | // MS compiler does not support inline assembly. sincos not available
|
43 | 52 | #endif
|
44 | 53 |
|
45 | 54 |
|
46 |
| -#if !(defined(__INTEL_COMPILER) && defined(__clang__)) |
47 |
| -#define TRIGPI_FUNCTIONS // sinpi etc. not yet defined intel icpx compiler 2022.1 |
48 |
| -#endif |
49 |
| - |
50 | 55 |
|
51 |
| -#ifdef __INTEL_COMPILER |
| 56 | +#ifdef USE_SVML_INTRINSICS |
52 | 57 |
|
53 | 58 | /*****************************************************************************
|
54 | 59 | *
|
@@ -284,7 +289,7 @@ static inline Vec2d cdfnorminv(Vec2d const x) { // inverse cumulative normal di
|
284 | 289 | *
|
285 | 290 | *************************************************************************************/
|
286 | 291 |
|
287 |
| -#if (defined(_WIN64) && !defined(__INTEL_COMPILER) ) |
| 292 | +#if (defined(_WIN64) && !defined(USE_SVML_INTRINSICS) ) |
288 | 293 | // (call with one parameter may work without __vectorcall because the parameter happens to be in zmm0, but that would be unsafe)
|
289 | 294 | #define V_VECTORCALL __vectorcall // fix calling convention, one parameter.
|
290 | 295 | #define V_VECTORCALL2 __vectorcall // fix calling convention, two parameters or two returns
|
@@ -627,15 +632,15 @@ static inline Vec2d cdfnorminv (Vec2d const x) { // inverse cumulative normal di
|
627 | 632 | return __svml_cdfnorminv2(x);
|
628 | 633 | }
|
629 | 634 |
|
630 |
| -#endif // __INTEL_COMPILER |
| 635 | +#endif // USE_SVML_INTRINSICS |
631 | 636 |
|
632 | 637 |
|
633 | 638 |
|
634 | 639 | #if defined (MAX_VECTOR_SIZE) && MAX_VECTOR_SIZE >= 256 // 256 bit vectors
|
635 | 640 |
|
636 | 641 | #if defined (VECTORF256_H) // 256-bit vector registers supported
|
637 | 642 |
|
638 |
| -#ifdef __INTEL_COMPILER |
| 643 | +#ifdef USE_SVML_INTRINSICS |
639 | 644 | /*****************************************************************************
|
640 | 645 | *
|
641 | 646 | * 256-bit vector functions using Intel compiler intrinsic functions
|
@@ -863,7 +868,7 @@ static inline Vec4d cdfnorminv(Vec4d const x) {// inverse cumulative normal dist
|
863 | 868 | return _mm256_cdfnorminv_pd(x);
|
864 | 869 | }
|
865 | 870 |
|
866 |
| -#else // not __INTEL_COMPILER |
| 871 | +#else // not USE_SVML_INTRINSICS |
867 | 872 | /*****************************************************************************
|
868 | 873 | *
|
869 | 874 | * 256-bit vector functions using other compiler than Intel
|
@@ -1170,7 +1175,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal d
|
1170 | 1175 | return __svml_cdfnorminv4(x);
|
1171 | 1176 | }
|
1172 | 1177 |
|
1173 |
| -#endif // __INTEL_COMPILER |
| 1178 | +#endif // USE_SVML_INTRINSICS |
1174 | 1179 |
|
1175 | 1180 | #else // not VECTORF256_H
|
1176 | 1181 |
|
@@ -1415,7 +1420,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal di
|
1415 | 1420 |
|
1416 | 1421 | #if defined (VECTORF512_H) // 512-bit vector registers supported
|
1417 | 1422 |
|
1418 |
| -#ifdef __INTEL_COMPILER |
| 1423 | +#ifdef USE_SVML_INTRINSICS |
1419 | 1424 | /*****************************************************************************
|
1420 | 1425 | *
|
1421 | 1426 | * 512-bit vector functions using Intel compiler intrinsic functions
|
@@ -1540,12 +1545,15 @@ static inline Vec8d cospi(Vec8d const x) { // cosine
|
1540 | 1545 | static inline Vec16f tanpi(Vec16f const x) { // tangent
|
1541 | 1546 | return _mm512_tanpi_ps(x);
|
1542 | 1547 | }
|
1543 |
| -/* |
| 1548 | + |
1544 | 1549 | static inline Vec8d tanpi(Vec8d const x) { // tangent
|
1545 |
| - // bug in compiler intrinsic? expecting argument __m512, should be __m512d |
| 1550 | +#ifdef __INTEL_COMPILER |
| 1551 | + // see https://community.intel.com/t5/Intel-C-Compiler/mm512-tanpi-pd-wrong-declaration/m-p/1404627 |
| 1552 | + return _mm512_castps_pd(_mm512_tanpi_pd(_mm512_castpd_ps(x))); |
| 1553 | +#else |
1546 | 1554 | return _mm512_tanpi_pd(x);
|
1547 |
| -} */ |
1548 |
| - |
| 1555 | +#endif |
| 1556 | +} |
1549 | 1557 | #endif // TRIGPI_FUNCTIONS
|
1550 | 1558 |
|
1551 | 1559 | // inverse trigonometric functions
|
@@ -1647,7 +1655,7 @@ static inline Vec8d cdfnorminv(Vec8d const x) { // inverse cumulative normal di
|
1647 | 1655 | return _mm512_cdfnorminv_pd(x);
|
1648 | 1656 | }
|
1649 | 1657 |
|
1650 |
| -#else // __INTEL_COMPILER |
| 1658 | +#else // USE_SVML_INTRINSICS |
1651 | 1659 | /*****************************************************************************
|
1652 | 1660 | *
|
1653 | 1661 | * 512-bit vector functions using other compiler than Intel
|
@@ -1954,7 +1962,7 @@ static inline Vec8d cdfnorminv (Vec8d const x) { // inverse cumulative normal
|
1954 | 1962 | return __svml_cdfnorminv8(x);
|
1955 | 1963 | }
|
1956 | 1964 |
|
1957 |
| -#endif // __INTEL_COMPILER |
| 1965 | +#endif // USE_SVML_INTRINSICS |
1958 | 1966 |
|
1959 | 1967 | #else // VECTORF512_H
|
1960 | 1968 |
|
|
0 commit comments