Skip to content

Commit 08959eb

Browse files
authored
Add files via upload
Version 2.02.00
1 parent d001236 commit 08959eb

File tree

2 files changed

+43
-25
lines changed

2 files changed

+43
-25
lines changed

‎instrset.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**************************** instrset.h **********************************
22
* Author: Agner Fog
33
* Date created: 2012-05-30
4-
* Last modified: 2022-07-21
4+
* Last modified: 2022-07-26
55
* Version: 2.02.00
66
* Project: vector class library
77
* Description:
@@ -23,6 +23,16 @@
2323
#ifndef INSTRSET_H
2424
#define INSTRSET_H 20200
2525

26+
// check if compiled for C++17
27+
#if defined(_MSVC_LANG) // MS compiler has its own version of __cplusplus with different value
28+
#if _MSVC_LANG < 201703
29+
#error Please compile for C++17 or higher
30+
#endif
31+
#else // all other compilers
32+
#if __cplusplus < 201703
33+
#error Please compile for C++17 or higher
34+
#endif
35+
#endif
2636

2737
// Allow the use of floating point permute instructions on integer vectors.
2838
// Some CPU's have an extra latency of 1 or 2 clock cycles for this, but

‎vectormath_lib.h

+32-24
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
/**************************** vectormath_lib.h *****************************
22
* Author: Agner Fog
33
* Date created: 2012-05-30
4-
* Last modified: 2022-07-26
4+
* Last modified: 2022-08-02
55
* Version: 2.02.00
66
* Project: vector class library
77
* Description:
88
* Header file defining mathematical functions on floating point vectors
9-
* using Intel SVML library
10-
*
11-
* Instructions to use SVML library:
12-
* Include this file and link with svml
9+
* using Intel SVML (Short Vector Math Library)
1310
*
11+
* Include this file if you want to use SVML for math functions on vectors
12+
* See vcl_manual.pdf for details on how to obtain the SVML library and link to it.
1413
* Alternatively, use the inline math functions by including
15-
* vectormath_exp.h for power and exponential functions
16-
* vectormath_trig.h for trigonometric functions
14+
* vectormath_exp.h for power and exponential functions,
15+
* vectormath_trig.h for trigonometric functions,
1716
* vectormath_hyp.h for hyperbolic functions
1817
*
1918
* For detailed instructions, see vcl_manual.pdf
@@ -36,19 +35,25 @@
3635
namespace VCL_NAMESPACE { // optional name space
3736
#endif
3837

38+
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
39+
#define USE_SVML_INTRINSICS // Intel compilers have intrinsic functions of access to SVML library
40+
#endif
41+
42+
#if !(defined(USE_SVML_INTRINSICS))
43+
// sinpi, cospi, and tanpi functions are included in SVML, but undocumented
44+
// (The "Classic" version of Intel compiler accepts the intrinsics of these functions even though they are not in the header files)
45+
#define TRIGPI_FUNCTIONS
46+
#endif
47+
3948
#if defined(__clang__) || defined (__GNUC__)
4049
#define SINCOS_ASM // sincos can be fixed with inline assembly
4150
#else
4251
// MS compiler does not support inline assembly. sincos not available
4352
#endif
4453

4554

46-
#if !(defined(__INTEL_COMPILER) && defined(__clang__))
47-
#define TRIGPI_FUNCTIONS // sinpi etc. not yet defined intel icpx compiler 2022.1
48-
#endif
49-
5055

51-
#ifdef __INTEL_COMPILER
56+
#ifdef USE_SVML_INTRINSICS
5257

5358
/*****************************************************************************
5459
*
@@ -284,7 +289,7 @@ static inline Vec2d cdfnorminv(Vec2d const x) { // inverse cumulative normal di
284289
*
285290
*************************************************************************************/
286291

287-
#if (defined(_WIN64) && !defined(__INTEL_COMPILER) )
292+
#if (defined(_WIN64) && !defined(USE_SVML_INTRINSICS) )
288293
// (call with one parameter may work without __vectorcall because the parameter happens to be in zmm0, but that would be unsafe)
289294
#define V_VECTORCALL __vectorcall // fix calling convention, one parameter.
290295
#define V_VECTORCALL2 __vectorcall // fix calling convention, two parameters or two returns
@@ -627,15 +632,15 @@ static inline Vec2d cdfnorminv (Vec2d const x) { // inverse cumulative normal di
627632
return __svml_cdfnorminv2(x);
628633
}
629634

630-
#endif // __INTEL_COMPILER
635+
#endif // USE_SVML_INTRINSICS
631636

632637

633638

634639
#if defined (MAX_VECTOR_SIZE) && MAX_VECTOR_SIZE >= 256 // 256 bit vectors
635640

636641
#if defined (VECTORF256_H) // 256-bit vector registers supported
637642

638-
#ifdef __INTEL_COMPILER
643+
#ifdef USE_SVML_INTRINSICS
639644
/*****************************************************************************
640645
*
641646
* 256-bit vector functions using Intel compiler intrinsic functions
@@ -863,7 +868,7 @@ static inline Vec4d cdfnorminv(Vec4d const x) {// inverse cumulative normal dist
863868
return _mm256_cdfnorminv_pd(x);
864869
}
865870

866-
#else // not __INTEL_COMPILER
871+
#else // not USE_SVML_INTRINSICS
867872
/*****************************************************************************
868873
*
869874
* 256-bit vector functions using other compiler than Intel
@@ -1170,7 +1175,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal d
11701175
return __svml_cdfnorminv4(x);
11711176
}
11721177

1173-
#endif // __INTEL_COMPILER
1178+
#endif // USE_SVML_INTRINSICS
11741179

11751180
#else // not VECTORF256_H
11761181

@@ -1415,7 +1420,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal di
14151420

14161421
#if defined (VECTORF512_H) // 512-bit vector registers supported
14171422

1418-
#ifdef __INTEL_COMPILER
1423+
#ifdef USE_SVML_INTRINSICS
14191424
/*****************************************************************************
14201425
*
14211426
* 512-bit vector functions using Intel compiler intrinsic functions
@@ -1540,12 +1545,15 @@ static inline Vec8d cospi(Vec8d const x) { // cosine
15401545
static inline Vec16f tanpi(Vec16f const x) { // tangent
15411546
return _mm512_tanpi_ps(x);
15421547
}
1543-
/*
1548+
15441549
static inline Vec8d tanpi(Vec8d const x) { // tangent
1545-
// bug in compiler intrinsic? expecting argument __m512, should be __m512d
1550+
#ifdef __INTEL_COMPILER
1551+
// see https://community.intel.com/t5/Intel-C-Compiler/mm512-tanpi-pd-wrong-declaration/m-p/1404627
1552+
return _mm512_castps_pd(_mm512_tanpi_pd(_mm512_castpd_ps(x)));
1553+
#else
15461554
return _mm512_tanpi_pd(x);
1547-
} */
1548-
1555+
#endif
1556+
}
15491557
#endif // TRIGPI_FUNCTIONS
15501558

15511559
// inverse trigonometric functions
@@ -1647,7 +1655,7 @@ static inline Vec8d cdfnorminv(Vec8d const x) { // inverse cumulative normal di
16471655
return _mm512_cdfnorminv_pd(x);
16481656
}
16491657

1650-
#else // __INTEL_COMPILER
1658+
#else // USE_SVML_INTRINSICS
16511659
/*****************************************************************************
16521660
*
16531661
* 512-bit vector functions using other compiler than Intel
@@ -1954,7 +1962,7 @@ static inline Vec8d cdfnorminv (Vec8d const x) { // inverse cumulative normal
19541962
return __svml_cdfnorminv8(x);
19551963
}
19561964

1957-
#endif // __INTEL_COMPILER
1965+
#endif // USE_SVML_INTRINSICS
19581966

19591967
#else // VECTORF512_H
19601968

0 commit comments

Comments
 (0)