Skip to content

Commit 47c932f

Browse files
committed
std.simd: suggest more sensible vector sizes across the board
1 parent 19943f0 commit 47c932f

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

lib/std/simd.zig

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,24 @@ pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu)
2323
} else if (cpu.arch.isArm()) {
2424
if (cpu.has(.arm, .neon)) break :blk 128;
2525
} else if (cpu.arch.isAARCH64()) {
26-
// SVE allows up to 2048 bits in the specification, as of 2022 the most powerful machine has implemented 512-bit
27-
// I think is safer to just be on 128 until is more common
28-
// TODO: Check on this return when bigger values are more common
29-
if (cpu.has(.aarch64, .sve)) break :blk 128;
26+
// NVIDIA Grace supports 128-bit SVE
27+
// AWS Graviton3 supports 256-bit SVE
28+
// Fujitsu A64FX supports 512-bit SVE
29+
// -> 256-bit seems like a good default for now.
30+
if (cpu.has(.aarch64, .sve)) break :blk 256;
3031
if (cpu.has(.aarch64, .neon)) break :blk 128;
31-
} else if (cpu.arch.isPowerPC()) {
32-
if (cpu.has(.powerpc, .altivec)) break :blk 128;
32+
} else if (cpu.arch == .hexagon) {
33+
if (cpu.has(.hexagon, .hvx_length64b)) break :blk 512;
34+
if (cpu.has(.hexagon, .hvx)) break :blk 1024;
35+
} else if (cpu.arch.isLoongArch()) {
36+
if (cpu.has(.loongarch, .lasx)) break :blk 256;
37+
if (cpu.has(.loongarch, .lsx)) break :blk 128;
3338
} else if (cpu.arch.isMIPS()) {
3439
if (cpu.has(.mips, .msa)) break :blk 128;
35-
// TODO: Test MIPS capability to handle bigger vectors
36-
// In theory MDMX and by extension mips3d have 32 registers of 64 bits which can use in parallel
37-
// for multiple processing, but I don't know what's optimal here, if using
38-
// the 2048 bits or using just 64 per vector or something in between
3940
if (cpu.has(.mips, .mips3d)) break :blk 64;
41+
} else if (cpu.arch.isPowerPC()) {
42+
if (cpu.has(.powerpc, .vsx)) break :blk 128;
43+
if (cpu.has(.powerpc, .altivec)) break :blk 128;
4044
} else if (cpu.arch.isRISCV()) {
4145
// In RISC-V Vector Registers are length agnostic so there's no good way to determine the best size.
4246
// The usual vector length in most RISC-V cpus is 256 bits, however it can get to multiple kB.
@@ -60,12 +64,12 @@ pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu)
6064

6165
break :blk 256;
6266
}
67+
} else if (cpu.arch == .s390x) {
68+
if (cpu.has(.s390x, .vector)) break :blk 128;
6369
} else if (cpu.arch.isSPARC()) {
64-
// TODO: Test Sparc capability to handle bigger vectors
65-
// In theory Sparc have 32 registers of 64 bits which can use in parallel
66-
// for multiple processing, but I don't know what's optimal here, if using
67-
// the 2048 bits or using just 64 per vector or something in between
6870
if (cpu.hasAny(.sparc, &.{ .vis, .vis2, .vis3 })) break :blk 64;
71+
} else if (cpu.arch == .ve) {
72+
if (cpu.has(.ve, .vpu)) break :blk 2048;
6973
} else if (cpu.arch.isWasm()) {
7074
if (cpu.has(.wasm, .simd128)) break :blk 128;
7175
}

0 commit comments

Comments
 (0)