|
5 | 5 | #define SIMD_SSE 0x1 |
6 | 6 | #define SIMD_SSE2 0x2 |
7 | 7 | #define SIMD_SSE3 0x4 |
8 | | -#define SIMD_SSE4_1 0x8 |
9 | | -#define SIMD_SSE4_2 0x10 |
10 | | -#define SIMD_AVX 0x20 |
11 | | -#define SIMD_AVX2 0x40 |
12 | | -#define SIMD_AVX512F 0x80 |
| 8 | +#define SIMD_SSSE3 0x8 |
| 9 | +#define SIMD_SSE4_1 0x10 |
| 10 | +#define SIMD_SSE4_2 0x20 |
| 11 | +#define SIMD_AVX 0x40 |
| 12 | +#define SIMD_AVX2 0x80 |
| 13 | +#define SIMD_AVX512F 0x100 |
13 | 14 |
|
14 | | -unsigned x86_simd(void) |
| 15 | +#ifndef _MSC_VER |
| 16 | +// adapted from https://github.com/01org/linux-sgx/blob/master/common/inc/internal/linux/cpuid_gnu.h |
| 17 | +void __cpuidex(int cpuid[4], int func_id, int subfunc_id) |
15 | 18 | { |
16 | | - unsigned eax, ebx, ecx, edx, flag = 0; |
17 | | -#ifdef _MSC_VER |
18 | | - int cpuid[4]; |
19 | | - __cpuid(cpuid, 1); |
20 | | - eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3]; |
21 | | -#else |
22 | | - asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (1)); |
| 19 | +#if defined(__x86_64__) |
| 20 | + asm volatile ("cpuid" |
| 21 | + : "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3]) |
| 22 | + : "0" (func_id), "2" (subfunc_id)); |
| 23 | +#else // on 32bit, ebx can NOT be used as PIC code |
| 24 | + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" |
| 25 | + : "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3]) |
| 26 | + : "0" (func_id), "2" (subfunc_id)); |
23 | 27 | #endif |
24 | | - if (edx>>25&1) flag |= SIMD_SSE; |
25 | | - if (edx>>26&1) flag |= SIMD_SSE2; |
26 | | - if (ecx>>0 &1) flag |= SIMD_SSE3; |
27 | | - if (ecx>>19&1) flag |= SIMD_SSE4_1; |
28 | | - if (ecx>>20&1) flag |= SIMD_SSE4_2; |
29 | | - if (ecx>>28&1) flag |= SIMD_AVX; |
30 | | - if (ebx>>5 &1) flag |= SIMD_AVX2; |
31 | | - if (ebx>>16&1) flag |= SIMD_AVX512F; |
| 28 | +} |
| 29 | +#endif |
| 30 | + |
| 31 | +int x86_simd(void) |
| 32 | +{ |
| 33 | + int flag = 0, cpuid[4], max_id; |
| 34 | + __cpuidex(cpuid, 0, 0); |
| 35 | + max_id = cpuid[0]; |
| 36 | + if (max_id == 0) return 0; |
| 37 | + __cpuidex(cpuid, 1, 0); |
| 38 | + if (cpuid[3]>>25&1) flag |= SIMD_SSE; |
| 39 | + if (cpuid[3]>>26&1) flag |= SIMD_SSE2; |
| 40 | + if (cpuid[2]>>0 &1) flag |= SIMD_SSE3; |
| 41 | + if (cpuid[2]>>9 &1) flag |= SIMD_SSSE3; |
| 42 | + if (cpuid[2]>>19&1) flag |= SIMD_SSE4_1; |
| 43 | + if (cpuid[2]>>20&1) flag |= SIMD_SSE4_2; |
| 44 | + if (cpuid[2]>>28&1) flag |= SIMD_AVX; |
| 45 | + if (max_id >= 7) { |
| 46 | + __cpuidex(cpuid, 7, 0); |
| 47 | + if (cpuid[1]>>5 &1) flag |= SIMD_AVX2; |
| 48 | + if (cpuid[1]>>16&1) flag |= SIMD_AVX512F; |
| 49 | + } |
32 | 50 | return flag; |
33 | 51 | } |
34 | 52 |
|
|
0 commit comments