Skip to content

Commit 1e5ff11

Browse files
committed
aarch64: Use canonical RTL representation for SVE2 XAR and extend it to fixed-width modes
The MD pattern for the XAR instruction in SVE2 is currently expressed with non-canonical RTL by using a ROTATERT code with a constant rotate amount. Fix it by using the left ROTATE code. This necessitates splitting out the expander separately to translate the immediate coming from the intrinsic from a right-rotate to a left-rotate immediate. Additionally, as the SVE2 XAR instruction is unpredicated and can handle all element sizes from .b to .d, it is a good fit for implementing the XOR+ROTATE operation for Advanced SIMD modes where the TARGET_SHA3 cannot be used (that can only handle V2DImode operands). Therefore let's extend the accepted modes of the SVE2 patternt to include the Advanced SIMD integer modes. This leads to some tests for the svxar* intrinsics to fail because they now simplify to a plain EOR when the rotate amount is the width of the element. This simplification is desirable (EOR instructions have better or equal throughput than XAR, and they are non-destructive of their input) so the tests are adjusted. For V2DImode XAR operations we should prefer the Advanced SIMD version when it is available (TARGET_SHA3) because it is non-destructive, so restrict the SVE2 pattern accordingly. Tests are added to confirm this. Bootstrapped and tested on aarch64-none-linux-gnu. Ok for mainline? Signed-off-by: Kyrylo Tkachov <[email protected]> gcc/ * config/aarch64/iterators.md (SVE_ASIMD_FULL_I): New mode iterator. * config/aarch64/aarch64-sve2.md (@aarch64_sve2_xar<mode>): Use SVE_ASIMD_FULL_I modes. Use ROTATE code for the rotate step. Adjust output logic. * config/aarch64/aarch64-sve-builtins-sve2.cc (svxar_impl): Define. (svxar): Use the above. gcc/testsuite/ * gcc.target/aarch64/xar_neon_modes.c: New test. * gcc.target/aarch64/xar_v2di_nonsve.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_s16.c: Scan for EOR rather than XAR. * gcc.target/aarch64/sve2/acle/asm/xar_s32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_s64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_s8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_u16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_u64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/xar_u8.c: Likewise.
1 parent de2bc6a commit 1e5ff11

File tree

13 files changed

+191
-59
lines changed

13 files changed

+191
-59
lines changed

gcc/config/aarch64/aarch64-sve-builtins-sve2.cc

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,22 @@ class svaba_impl : public function_base
108108
}
109109
};
110110

111+
class svxar_impl : public function_base
112+
{
113+
public:
114+
rtx
115+
expand (function_expander &e) const override
116+
{
117+
/* aarch64_sve2_xar represents this operation with a left-rotate RTX.
118+
Convert the right-rotate amount from the intrinsic to fit this. */
119+
machine_mode mode = e.vector_mode (0);
120+
HOST_WIDE_INT rot = GET_MODE_UNIT_BITSIZE (mode)
121+
- INTVAL (e.args[2]);
122+
e.args[2] = aarch64_simd_gen_const_vector_dup (mode, rot);
123+
return e.use_exact_insn (code_for_aarch64_sve2_xar (mode));
124+
}
125+
};
126+
111127
class svcdot_impl : public function_base
112128
{
113129
public:
@@ -795,6 +811,6 @@ FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
795811
FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
796812
FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
797813
FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
798-
FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
814+
FUNCTION (svxar, svxar_impl,)
799815

800816
} /* end namespace aarch64_sve */

gcc/config/aarch64/aarch64-sve2.md

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,18 +1266,28 @@
12661266
;; - XAR
12671267
;; -------------------------------------------------------------------------
12681268

1269+
;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
1270+
;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
1271+
;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
1272+
;; version should be preferred when available as it is non-destructive on its
1273+
;; input.
12691274
(define_insn "@aarch64_sve2_xar<mode>"
1270-
[(set (match_operand:SVE_FULL_I 0 "register_operand")
1271-
(rotatert:SVE_FULL_I
1272-
(xor:SVE_FULL_I
1273-
(match_operand:SVE_FULL_I 1 "register_operand")
1274-
(match_operand:SVE_FULL_I 2 "register_operand"))
1275-
(match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
1276-
"TARGET_SVE2"
1277-
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1278-
[ w , %0 , w ; * ] xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
1279-
[ ?&w , w , w ; yes ] movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
1275+
[(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
1276+
(rotate:SVE_ASIMD_FULL_I
1277+
(xor:SVE_ASIMD_FULL_I
1278+
(match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
1279+
(match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
1280+
(match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
1281+
"TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
1282+
{
1283+
operands[3]
1284+
= GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
1285+
- INTVAL (unwrap_const_vec_duplicate (operands[3])));
1286+
if (which_alternative == 0)
1287+
return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
1288+
return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
12801289
}
1290+
[(set_attr "movprfx" "*,yes")]
12811291
)
12821292

12831293
;; -------------------------------------------------------------------------

gcc/config/aarch64/iterators.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,9 @@
446446
;; All fully-packed SVE integer vector modes.
447447
(define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI])
448448

449+
;; All fully-packed SVE integer and Advanced SIMD integer modes.
450+
(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])
451+
449452
;; All fully-packed SVE floating-point vector modes.
450453
(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
451454

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s16_untied, svint16_t,
7070

7171
/*
7272
** xar_16_s16_tied1:
73-
** xar z0\.h, z0\.h, z1\.h, #16
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
7983

8084
/*
8185
** xar_16_s16_tied2:
82-
** xar z0\.h, z0\.h, z1\.h, #16
86+
** (
87+
** eor z0\.d, z1\.d, z0\.d
88+
** |
89+
** eor z0\.d, z0\.d, z1\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
8997
/*
9098
** xar_16_s16_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.h, z0\.h, z2\.h, #16
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.h, z0\.h, z1\.h, #16
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s32_untied, svint32_t,
7070

7171
/*
7272
** xar_32_s32_tied1:
73-
** xar z0\.s, z0\.s, z1\.s, #32
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
7983

8084
/*
8185
** xar_32_s32_tied2:
82-
** xar z0\.s, z0\.s, z1\.s, #32
86+
** (
87+
** eor z0\.d, z0\.d, z1\.d
88+
** |
89+
** eor z0\.d, z1\.d, z0\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
8997
/*
9098
** xar_32_s32_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.s, z0\.s, z2\.s, #32
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.s, z0\.s, z1\.s, #32
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s64_untied, svint64_t,
7070

7171
/*
7272
** xar_64_s64_tied1:
73-
** xar z0\.d, z0\.d, z1\.d, #64
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
7983

8084
/*
8185
** xar_64_s64_tied2:
82-
** xar z0\.d, z0\.d, z1\.d, #64
86+
** (
87+
** eor z0\.d, z1\.d, z0\.d
88+
** |
89+
** eor z0\.d, z0\.d, z1\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
8997
/*
9098
** xar_64_s64_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.d, z0\.d, z2\.d, #64
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.d, z0\.d, z1\.d, #64
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s8_untied, svint8_t,
7070

7171
/*
7272
** xar_8_s8_tied1:
73-
** xar z0\.b, z0\.b, z1\.b, #8
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
7983

8084
/*
8185
** xar_8_s8_tied2:
82-
** xar z0\.b, z0\.b, z1\.b, #8
86+
** (
87+
** eor z0\.d, z1\.d, z0\.d
88+
** |
89+
** eor z0\.d, z0\.d, z1\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
8997
/*
9098
** xar_8_s8_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.b, z0\.b, z2\.b, #8
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.b, z0\.b, z1\.b, #8
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u16_untied, svuint16_t,
7070

7171
/*
7272
** xar_16_u16_tied1:
73-
** xar z0\.h, z0\.h, z1\.h, #16
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
7983

8084
/*
8185
** xar_16_u16_tied2:
82-
** xar z0\.h, z0\.h, z1\.h, #16
86+
** (
87+
** eor z0\.d, z1\.d, z0\.d
88+
** |
89+
** eor z0\.d, z0\.d, z1\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
8997
/*
9098
** xar_16_u16_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.h, z0\.h, z2\.h, #16
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.h, z0\.h, z1\.h, #16
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u32_untied, svuint32_t,
7070

7171
/*
7272
** xar_32_u32_tied1:
73-
** xar z0\.s, z0\.s, z1\.s, #32
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
7983

8084
/*
8185
** xar_32_u32_tied2:
82-
** xar z0\.s, z0\.s, z1\.s, #32
86+
** (
87+
** eor z0\.d, z1\.d, z0\.d
88+
** |
89+
** eor z0\.d, z0\.d, z1\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
8997
/*
9098
** xar_32_u32_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.s, z0\.s, z2\.s, #32
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.s, z0\.s, z1\.s, #32
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u64_untied, svuint64_t,
7070

7171
/*
7272
** xar_64_u64_tied1:
73-
** xar z0\.d, z0\.d, z1\.d, #64
73+
** (
74+
** eor z0\.d, z1\.d, z0\.d
75+
** |
76+
** eor z0\.d, z0\.d, z1\.d
77+
** )
7478
** ret
7579
*/
7680
TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
7983

8084
/*
8185
** xar_64_u64_tied2:
82-
** xar z0\.d, z0\.d, z1\.d, #64
86+
** (
87+
** eor z0\.d, z1\.d, z0\.d
88+
** |
89+
** eor z0\.d, z0\.d, z1\.d
90+
** )
8391
** ret
8492
*/
8593
TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
8997
/*
9098
** xar_64_u64_untied:
9199
** (
92-
** movprfx z0, z1
93-
** xar z0\.d, z0\.d, z2\.d, #64
100+
** eor z0\.d, z1\.d, z2\.d
94101
** |
95-
** movprfx z0, z2
96-
** xar z0\.d, z0\.d, z1\.d, #64
102+
** eor z0\.d, z2\.d, z1\.d
97103
** )
98104
** ret
99105
*/

0 commit comments

Comments
 (0)