Skip to content

Commit e903362

Browse files
feat: correcting errors with generated C artifacts
1 parent 8efed65 commit e903362

File tree

7 files changed

+50
-72
lines changed

7 files changed

+50
-72
lines changed

crates/intrinsic-test/src/common/argument.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ where
108108
for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
109109
writeln!(
110110
w,
111-
"{indentation}const {ty} {name}_vals[] = {values};",
111+
"{indentation}alignas(64) const {ty} {name}_vals[] = {values};",
112112
ty = arg.ty.c_scalar_type(),
113113
name = arg.name,
114114
values = arg.ty.populate_random(indentation, loads, &Language::C)

crates/intrinsic-test/src/common/gen_c.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
4747
let ty = current.ty.c_type();
4848

4949
writeln!(w, "{indentation}{{")?;
50-
writeln!(w, "{body_indentation}{ty} {} = {i};", current.name)?;
50+
writeln!(w, "{body_indentation}const {ty} {} = {i};", current.name)?;
5151

5252
generate_c_constraint_blocks(
5353
w,
@@ -103,14 +103,11 @@ pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
103103
writeln!(w, "#include <{header}>")?;
104104
}
105105

106+
writeln!(w, "{}", forward_declarations)?;
107+
106108
writeln!(
107109
w,
108110
r#"
109-
#include <iostream>
110-
#include <cstring>
111-
#include <iomanip>
112-
#include <sstream>
113-
114111
template<typename T1, typename T2> T1 cast(T2 x) {{
115112
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
116113
T1 ret{{}};
@@ -120,13 +117,9 @@ template<typename T1, typename T2> T1 cast(T2 x) {{
120117
121118
std::ostream& operator<<(std::ostream& os, float16_t value);
122119
123-
124-
125120
"#
126121
)?;
127122

128-
writeln!(w, "{}", forward_declarations)?;
129-
130123
for intrinsic in intrinsics {
131124
create_c_test_function(w, intrinsic)?;
132125
}
@@ -137,12 +130,13 @@ std::ostream& operator<<(std::ostream& os, float16_t value);
137130
pub fn write_main_cpp<'a>(
138131
w: &mut impl std::io::Write,
139132
arch_specific_definitions: &str,
133+
arch_specific_headers: &[&str],
140134
intrinsics: impl Iterator<Item = &'a str> + Clone,
141135
) -> std::io::Result<()> {
142136
writeln!(w, "#include <iostream>")?;
143137
writeln!(w, "#include <string>")?;
144138

145-
for header in ["arm_neon.h", "arm_acle.h", "arm_fp16.h"] {
139+
for header in arch_specific_headers {
146140
writeln!(w, "#include <{header}>")?;
147141
}
148142

crates/intrinsic-test/src/common/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ pub trait SupportedArchitectureTest {
9797
write_main_cpp(
9898
&mut file,
9999
Self::PLATFORM_C_DEFINITIONS,
100+
Self::PLATFORM_C_HEADERS,
100101
self.intrinsics().iter().map(|i| i.name.as_str()),
101102
)
102103
.unwrap();

crates/intrinsic-test/src/x86/compile.rs

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,7 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
66

77
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
88
let mut command = CompilationCommandBuilder::new()
9-
.add_arch_flags([
10-
"avx",
11-
"avx2",
12-
"avx512f",
13-
"avx512cd",
14-
"avx512dq",
15-
"avx512vl",
16-
"avx512bw",
17-
"avx512bf16",
18-
"avx512bitalg",
19-
"lzcnt",
20-
"popcnt",
21-
"adx",
22-
"aes",
23-
])
9+
.add_arch_flags(["icelake-client"])
2410
.set_compiler(cpp_compiler)
2511
.set_target(&config.target)
2612
.set_opt_level("2")

crates/intrinsic-test/src/x86/config.rs

Lines changed: 21 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,62 +10,41 @@ struct Hex<T>(T);
1010
"#;
1111

1212
pub const LANE_FUNCTION_HELPERS: &str = r#"
13-
int mm512_extract(__m512i m, int vec_len, int bit_len, int index) {
14-
int lane_len = 128;
15-
int max_major_index = vec_len / lane_len;
16-
int max_minor_index = lane_len / bit_len;
13+
typedef float float16_t;
14+
typedef float float32_t;
15+
typedef double float64_t;
1716
18-
int major_index = index / max_major_index;
19-
int minor_index = index % max_minor_index;
17+
#define __int64 long long
2018
21-
__m128i lane = _mm512_extracti64x2_epi64(m, major_index);
19+
#define mm512_extract_intrinsic_test_epi8(m, lane) \
20+
_mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16)
2221
23-
switch(bit_len){
24-
case 8:
25-
return _mm_extract_epi8(lane, minor_index);
26-
case 16:
27-
return _mm_extract_epi16(lane, minor_index);
28-
case 32:
29-
return _mm_extract_epi32(lane, minor_index);
30-
case 64:
31-
return _mm_extract_epi64(lane, minor_index);
32-
}
33-
}
22+
#define mm512_extract_intrinsic_test_epi16(m, lane) \
23+
_mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8)
3424
35-
int _mm512_extract_intrinsic_test_epi8(__m512i m, int lane) {
36-
return mm512_extract(m, 512, 8, lane)
37-
}
25+
#define mm512_extract_intrinsic_test_epi32(m, lane) \
26+
_mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4)
3827
39-
int _mm512_extract_intrinsic_test_epi16(__m512i m, int lane) {
40-
return mm512_extract(m, 512, 16, lane)
41-
}
28+
#define mm512_extract_intrinsic_test_epi64(m, lane) \
29+
_mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2)
4230
43-
int mm512_extract_intrinsic_test_epi16(__m512i m, int lane) {
44-
return mm512_extract(m, 512, 16, lane)
45-
}
31+
#define mm64_extract_intrinsic_test_epi8(m, lane) \
32+
((_mm_extract_pi16((m), (lane) / 2) >> (((lane) % 2) * 8)) & 0xFF)
4633
47-
int mm512_extract_intrinsic_test_epi64(__m512i m, int lane) {
48-
return mm512_extract(m, 512, 64, lane)
49-
}
50-
51-
int mm64_extract_intrinsic_test_epi8(__m64 m, int lane) {
52-
int real_lane_shift = lane / 2;
53-
int real_bit_shift = (lane % 2) * 8;
54-
int result = _mm_extract_pi16(m, lane / 2);
55-
return (result >> real_bit_shift);
56-
}
57-
58-
int mm64_extract_intrinsic_test_epi32(__m64 m, int lane) {
59-
int bit_shift_amount = lane * 32;
60-
return _m_to_int(m >> bit_shift_amount);
61-
}
34+
#define mm64_extract_intrinsic_test_epi32(m, lane) \
35+
_mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32))
6236
"#;
6337

6438
pub const X86_CONFIGURATIONS: &str = r#"
39+
#![cfg_attr(target_arch = "x86", feature(avx))]
40+
#![cfg_attr(target_arch = "x86", feature(sse))]
41+
#![cfg_attr(target_arch = "x86", feature(sse2))]
6542
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))]
6643
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))]
6744
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
6845
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
46+
#![cfg_attr(target_arch = "x86_64", feature(sse))]
47+
#![cfg_attr(target_arch = "x86_64", feature(sse2))]
6948
#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))]
7049
#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))]
7150
#![feature(fmt_helpers_for_derive)]

crates/intrinsic-test/src/x86/mod.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,17 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
3535

3636
const NOTICE: &str = config::NOTICE;
3737

38-
const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"];
38+
const PLATFORM_C_HEADERS: &[&str] = &[
39+
"immintrin.h",
40+
"iostream",
41+
"cstring",
42+
"iomanip",
43+
"sstream",
44+
"cstddef",
45+
"cstdint",
46+
];
3947
const PLATFORM_C_DEFINITIONS: &str = config::LANE_FUNCTION_HELPERS;
40-
const PLATFORM_C_FORWARD_DECLARATIONS: &str = "";
48+
const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::LANE_FUNCTION_HELPERS;
4149

4250
const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF;
4351
const PLATFORM_RUST_CFGS: &str = config::X86_CONFIGURATIONS;

crates/intrinsic-test/src/x86/types.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,17 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
110110
.filter(|c| c.is_numeric())
111111
.join("")
112112
.replace("128", "");
113-
format!("_mm{type_val_filtered}_set1_epi64")
113+
{
114+
if type_value.ends_with("d") {
115+
format!("_mm{type_val_filtered}_loadu_pd")
116+
} else if type_value.ends_with("h") {
117+
format!("_mm{type_val_filtered}_loadu_ph")
118+
} else if type_value.ends_with("i") {
119+
format!("_mm{type_val_filtered}_loadu_epi16")
120+
} else {
121+
format!("_mm{type_val_filtered}_loadu_ps")
122+
}
123+
}
114124
} else {
115125
// if it is a pointer, then rely on type conversion
116126
// If it is not any of the above type (__int<num>, __bfloat16, unsigned short, etc)

0 commit comments

Comments
 (0)