@@ -10,62 +10,41 @@ struct Hex<T>(T);
10
10
"# ;
11
11
12
12
pub const LANE_FUNCTION_HELPERS : & str = r#"
13
- int mm512_extract(__m512i m, int vec_len, int bit_len, int index) {
14
- int lane_len = 128;
15
- int max_major_index = vec_len / lane_len;
16
- int max_minor_index = lane_len / bit_len;
13
+ typedef float float16_t;
14
+ typedef float float32_t;
15
+ typedef double float64_t;
17
16
18
- int major_index = index / max_major_index;
19
- int minor_index = index % max_minor_index;
17
+ #define __int64 long long
20
18
21
- __m128i lane = _mm512_extracti64x2_epi64(m, major_index);
19
+ #define mm512_extract_intrinsic_test_epi8(m, lane) \
20
+ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16)
22
21
23
- switch(bit_len){
24
- case 8:
25
- return _mm_extract_epi8(lane, minor_index);
26
- case 16:
27
- return _mm_extract_epi16(lane, minor_index);
28
- case 32:
29
- return _mm_extract_epi32(lane, minor_index);
30
- case 64:
31
- return _mm_extract_epi64(lane, minor_index);
32
- }
33
- }
22
+ #define mm512_extract_intrinsic_test_epi16(m, lane) \
23
+ _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8)
34
24
35
- int _mm512_extract_intrinsic_test_epi8(__m512i m, int lane) {
36
- return mm512_extract(m, 512, 8, lane)
37
- }
25
+ #define mm512_extract_intrinsic_test_epi32(m, lane) \
26
+ _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4)
38
27
39
- int _mm512_extract_intrinsic_test_epi16(__m512i m, int lane) {
40
- return mm512_extract(m, 512, 16, lane)
41
- }
28
+ #define mm512_extract_intrinsic_test_epi64(m, lane) \
29
+ _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2)
42
30
43
- int mm512_extract_intrinsic_test_epi16(__m512i m, int lane) {
44
- return mm512_extract(m, 512, 16, lane)
45
- }
31
+ #define mm64_extract_intrinsic_test_epi8(m, lane) \
32
+ ((_mm_extract_pi16((m), (lane) / 2) >> (((lane) % 2) * 8)) & 0xFF)
46
33
47
- int mm512_extract_intrinsic_test_epi64(__m512i m, int lane) {
48
- return mm512_extract(m, 512, 64, lane)
49
- }
50
-
51
- int mm64_extract_intrinsic_test_epi8(__m64 m, int lane) {
52
- int real_lane_shift = lane / 2;
53
- int real_bit_shift = (lane % 2) * 8;
54
- int result = _mm_extract_pi16(m, lane / 2);
55
- return (result >> real_bit_shift);
56
- }
57
-
58
- int mm64_extract_intrinsic_test_epi32(__m64 m, int lane) {
59
- int bit_shift_amount = lane * 32;
60
- return _m_to_int(m >> bit_shift_amount);
61
- }
34
+ #define mm64_extract_intrinsic_test_epi32(m, lane) \
35
+ _mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32))
62
36
"# ;
63
37
64
38
pub const X86_CONFIGURATIONS : & str = r#"
39
+ #![cfg_attr(target_arch = "x86", feature(avx))]
40
+ #![cfg_attr(target_arch = "x86", feature(sse))]
41
+ #![cfg_attr(target_arch = "x86", feature(sse2))]
65
42
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))]
66
43
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))]
67
44
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
68
45
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
46
+ #![cfg_attr(target_arch = "x86_64", feature(sse))]
47
+ #![cfg_attr(target_arch = "x86_64", feature(sse2))]
69
48
#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))]
70
49
#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))]
71
50
#![feature(fmt_helpers_for_derive)]
0 commit comments