@@ -23,20 +23,24 @@ pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu)
23
23
} else if (cpu .arch .isArm ()) {
24
24
if (cpu .has (.arm , .neon )) break :blk 128 ;
25
25
} else if (cpu .arch .isAARCH64 ()) {
26
- // SVE allows up to 2048 bits in the specification, as of 2022 the most powerful machine has implemented 512-bit
27
- // I think is safer to just be on 128 until is more common
28
- // TODO: Check on this return when bigger values are more common
29
- if (cpu .has (.aarch64 , .sve )) break :blk 128 ;
26
+ // NVIDIA Grace supports 128-bit SVE
27
+ // AWS Graviton3 supports 256-bit SVE
28
+ // Fujitsu A64FX supports 512-bit SVE
29
+ // -> 256-bit seems like a good default for now.
30
+ if (cpu .has (.aarch64 , .sve )) break :blk 256 ;
30
31
if (cpu .has (.aarch64 , .neon )) break :blk 128 ;
31
- } else if (cpu .arch .isPowerPC ()) {
32
- if (cpu .has (.powerpc , .altivec )) break :blk 128 ;
32
+ } else if (cpu .arch == .hexagon ) {
33
+ if (cpu .has (.hexagon , .hvx_length64b )) break :blk 512 ;
34
+ if (cpu .has (.hexagon , .hvx )) break :blk 1024 ;
35
+ } else if (cpu .arch .isLoongArch ()) {
36
+ if (cpu .has (.loongarch , .lasx )) break :blk 256 ;
37
+ if (cpu .has (.loongarch , .lsx )) break :blk 128 ;
33
38
} else if (cpu .arch .isMIPS ()) {
34
39
if (cpu .has (.mips , .msa )) break :blk 128 ;
35
- // TODO: Test MIPS capability to handle bigger vectors
36
- // In theory MDMX and by extension mips3d have 32 registers of 64 bits which can use in parallel
37
- // for multiple processing, but I don't know what's optimal here, if using
38
- // the 2048 bits or using just 64 per vector or something in between
39
40
if (cpu .has (.mips , .mips3d )) break :blk 64 ;
41
+ } else if (cpu .arch .isPowerPC ()) {
42
+ if (cpu .has (.powerpc , .vsx )) break :blk 128 ;
43
+ if (cpu .has (.powerpc , .altivec )) break :blk 128 ;
40
44
} else if (cpu .arch .isRISCV ()) {
41
45
// In RISC-V Vector Registers are length agnostic so there's no good way to determine the best size.
42
46
// The usual vector length in most RISC-V cpus is 256 bits, however it can get to multiple kB.
@@ -60,12 +64,12 @@ pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu)
60
64
61
65
break :blk 256 ;
62
66
}
67
+ } else if (cpu .arch == .s390x ) {
68
+ if (cpu .has (.s390x , .vector )) break :blk 128 ;
63
69
} else if (cpu .arch .isSPARC ()) {
64
- // TODO: Test Sparc capability to handle bigger vectors
65
- // In theory Sparc have 32 registers of 64 bits which can use in parallel
66
- // for multiple processing, but I don't know what's optimal here, if using
67
- // the 2048 bits or using just 64 per vector or something in between
68
70
if (cpu .hasAny (.sparc , &.{ .vis , .vis2 , .vis3 })) break :blk 64 ;
71
+ } else if (cpu .arch == .ve ) {
72
+ if (cpu .has (.ve , .vpu )) break :blk 2048 ;
69
73
} else if (cpu .arch .isWasm ()) {
70
74
if (cpu .has (.wasm , .simd128 )) break :blk 128 ;
71
75
}
0 commit comments