Skip to content

Commit 8b9f2aa

Browse files
committed
r339: improved SIMD detection
old code does not check AVX2
1 parent 46e8b6a commit 8b9f2aa

File tree

2 files changed

+40
-22
lines changed

2 files changed

+40
-22
lines changed

ksw2_dispatch.c

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,48 @@
55
#define SIMD_SSE 0x1
66
#define SIMD_SSE2 0x2
77
#define SIMD_SSE3 0x4
8-
#define SIMD_SSE4_1 0x8
9-
#define SIMD_SSE4_2 0x10
10-
#define SIMD_AVX 0x20
11-
#define SIMD_AVX2 0x40
12-
#define SIMD_AVX512F 0x80
8+
#define SIMD_SSSE3 0x8
9+
#define SIMD_SSE4_1 0x10
10+
#define SIMD_SSE4_2 0x20
11+
#define SIMD_AVX 0x40
12+
#define SIMD_AVX2 0x80
13+
#define SIMD_AVX512F 0x100
1314

14-
unsigned x86_simd(void)
15+
#ifndef _MSC_VER
16+
// adapted from https://github.com/01org/linux-sgx/blob/master/common/inc/internal/linux/cpuid_gnu.h
17+
void __cpuidex(int cpuid[4], int func_id, int subfunc_id)
1518
{
16-
unsigned eax, ebx, ecx, edx, flag = 0;
17-
#ifdef _MSC_VER
18-
int cpuid[4];
19-
__cpuid(cpuid, 1);
20-
eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3];
21-
#else
22-
asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (1));
19+
#if defined(__x86_64__)
20+
asm volatile ("cpuid"
21+
: "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
22+
: "0" (func_id), "2" (subfunc_id));
23+
#else // on 32bit, ebx can NOT be used as PIC code
24+
asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
25+
: "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
26+
: "0" (func_id), "2" (subfunc_id));
2327
#endif
24-
if (edx>>25&1) flag |= SIMD_SSE;
25-
if (edx>>26&1) flag |= SIMD_SSE2;
26-
if (ecx>>0 &1) flag |= SIMD_SSE3;
27-
if (ecx>>19&1) flag |= SIMD_SSE4_1;
28-
if (ecx>>20&1) flag |= SIMD_SSE4_2;
29-
if (ecx>>28&1) flag |= SIMD_AVX;
30-
if (ebx>>5 &1) flag |= SIMD_AVX2;
31-
if (ebx>>16&1) flag |= SIMD_AVX512F;
28+
}
29+
#endif
30+
31+
int x86_simd(void)
32+
{
33+
int flag = 0, cpuid[4], max_id;
34+
__cpuidex(cpuid, 0, 0);
35+
max_id = cpuid[0];
36+
if (max_id == 0) return 0;
37+
__cpuidex(cpuid, 1, 0);
38+
if (cpuid[3]>>25&1) flag |= SIMD_SSE;
39+
if (cpuid[3]>>26&1) flag |= SIMD_SSE2;
40+
if (cpuid[2]>>0 &1) flag |= SIMD_SSE3;
41+
if (cpuid[2]>>9 &1) flag |= SIMD_SSSE3;
42+
if (cpuid[2]>>19&1) flag |= SIMD_SSE4_1;
43+
if (cpuid[2]>>20&1) flag |= SIMD_SSE4_2;
44+
if (cpuid[2]>>28&1) flag |= SIMD_AVX;
45+
if (max_id >= 7) {
46+
__cpuidex(cpuid, 7, 0);
47+
if (cpuid[1]>>5 &1) flag |= SIMD_AVX2;
48+
if (cpuid[1]>>16&1) flag |= SIMD_AVX512F;
49+
}
3250
return flag;
3351
}
3452

main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include "mmpriv.h"
77
#include "getopt.h"
88

9-
#define MM_VERSION "2.1-r338-dirty"
9+
#define MM_VERSION "2.1-r339-dirty"
1010

1111
#ifdef __linux__
1212
#include <sys/resource.h>

0 commit comments

Comments
 (0)