Skip to content

Commit de2bc6a

Browse files
committed
PR 117048: simplify-rtx: Simplify (X << C1) [+,^] (X >> C2) into ROTATE
simplify-rtx can transform (X << C1) | (X >> C2) into ROTATE (X, C1) when C1 + C2 == mode-width. But the transformation is also valid for PLUS and XOR. Indeed GIMPLE can also do the fold. Let's teach RTL to do it too. The motivating testcase for this is in AArch64 intrinsics: uint64x2_t G2(uint64x2_t a, uint64x2_t b) { uint64x2_t c = veorq_u64(a, b); return veorq_u64(vaddq_u64(c, c), vshrq_n_u64(c, 63)); } which I was hoping to fold to a single XAR (a ROTATE+XOR instruction) but GCC was failing to detect the rotate operation for two reasons: 1) The combination of the two arms of the expression is done under XOR rather than IOR that simplify-rtx currently supports. 2) The ASHIFT operation is actually a (PLUS X X) operation and thus is not detected as the LHS of the two arms we require. The patch fixes both issues. The analysis of the two arms of the rotation expression is factored out into a common helper simplify_rotate which is then used in the PLUS, XOR, IOR cases in simplify_binary_operation_1. The check-assembly testcase for this is added in the following patch because it needs some extra AArch64 backend work, but I've added self-tests in this patch to validate the transformation. Bootstrapped and tested on aarch64-none-linux-gnu Signed-off-by: Kyrylo Tkachov <[email protected]> PR target/117048 * simplify-rtx.cc (extract_ashift_operands_p): Define. (simplify_rotate_op): Likewise. (simplify_context::simplify_binary_operation_1): Use the above in the PLUS, IOR, XOR cases. (test_vector_rotate): Define. (test_vector_ops): Use the above.
1 parent d3d925b commit de2bc6a

File tree

1 file changed

+156
-48
lines changed

1 file changed

+156
-48
lines changed

gcc/simplify-rtx.cc

Lines changed: 156 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2820,6 +2820,104 @@ reverse_rotate_by_imm_p (machine_mode mode, unsigned int left, rtx op1)
28202820
return false;
28212821
}
28222822

2823+
/* Analyse argument X to see if it represents an (ASHIFT X Y) operation
2824+
and return the expression to be shifted in SHIFT_OPND and the shift amount
2825+
in SHIFT_AMNT. This is primarily used to group handling of ASHIFT (X, CST)
2826+
and (PLUS (X, X)) in one place. If the expression is not equivalent to an
2827+
ASHIFT then return FALSE and set SHIFT_OPND and SHIFT_AMNT to NULL. */
2828+
2829+
static bool
2830+
extract_ashift_operands_p (rtx x, rtx *shift_opnd, rtx *shift_amnt)
2831+
{
2832+
if (GET_CODE (x) == ASHIFT)
2833+
{
2834+
*shift_opnd = XEXP (x, 0);
2835+
*shift_amnt = XEXP (x, 1);
2836+
return true;
2837+
}
2838+
if (GET_CODE (x) == PLUS && rtx_equal_p (XEXP (x, 0), XEXP (x, 1)))
2839+
{
2840+
*shift_opnd = XEXP (x, 0);
2841+
*shift_amnt = CONST1_RTX (GET_MODE (x));
2842+
return true;
2843+
}
2844+
*shift_opnd = NULL_RTX;
2845+
*shift_amnt = NULL_RTX;
2846+
return false;
2847+
}
2848+
2849+
/* OP0 and OP1 are combined under an operation of mode MODE that can
2850+
potentially result in a ROTATE expression. Analyze the OP0 and OP1
2851+
and return the resulting ROTATE expression if so. Return NULL otherwise.
2852+
This is used in detecting the patterns (X << C1) [+,|,^] (X >> C2) where
2853+
C1 + C2 == GET_MODE_UNIT_PRECISION (mode).
2854+
(X << C1) and (C >> C2) would be OP0 and OP1. */
2855+
2856+
static rtx
2857+
simplify_rotate_op (rtx op0, rtx op1, machine_mode mode)
2858+
{
2859+
/* Convert (ior (ashift A CX) (lshiftrt A CY)) where CX+CY equals the
2860+
mode size to (rotate A CX). */
2861+
2862+
rtx opleft = simplify_rtx (op0);
2863+
rtx opright = simplify_rtx (op1);
2864+
rtx ashift_opnd, ashift_amnt;
2865+
/* In some cases the ASHIFT is not a direct ASHIFT. Look deeper and extract
2866+
the relevant operands here. */
2867+
bool ashift_op_p
2868+
= extract_ashift_operands_p (op1, &ashift_opnd, &ashift_amnt);
2869+
2870+
if (ashift_op_p
2871+
|| GET_CODE (op1) == SUBREG)
2872+
{
2873+
opleft = op1;
2874+
opright = op0;
2875+
}
2876+
else
2877+
{
2878+
opright = op1;
2879+
opleft = op0;
2880+
ashift_op_p
2881+
= extract_ashift_operands_p (opleft, &ashift_opnd, &ashift_amnt);
2882+
}
2883+
2884+
if (ashift_op_p && GET_CODE (opright) == LSHIFTRT
2885+
&& rtx_equal_p (ashift_opnd, XEXP (opright, 0)))
2886+
{
2887+
rtx leftcst = unwrap_const_vec_duplicate (ashift_amnt);
2888+
rtx rightcst = unwrap_const_vec_duplicate (XEXP (opright, 1));
2889+
2890+
if (CONST_INT_P (leftcst) && CONST_INT_P (rightcst)
2891+
&& (INTVAL (leftcst) + INTVAL (rightcst)
2892+
== GET_MODE_UNIT_PRECISION (mode)))
2893+
return gen_rtx_ROTATE (mode, XEXP (opright, 0), ashift_amnt);
2894+
}
2895+
2896+
/* Same, but for ashift that has been "simplified" to a wider mode
2897+
by simplify_shift_const. */
2898+
scalar_int_mode int_mode, inner_mode;
2899+
2900+
if (GET_CODE (opleft) == SUBREG
2901+
&& is_a <scalar_int_mode> (mode, &int_mode)
2902+
&& is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (opleft)),
2903+
&inner_mode)
2904+
&& GET_CODE (SUBREG_REG (opleft)) == ASHIFT
2905+
&& GET_CODE (opright) == LSHIFTRT
2906+
&& GET_CODE (XEXP (opright, 0)) == SUBREG
2907+
&& known_eq (SUBREG_BYTE (opleft), SUBREG_BYTE (XEXP (opright, 0)))
2908+
&& GET_MODE_SIZE (int_mode) < GET_MODE_SIZE (inner_mode)
2909+
&& rtx_equal_p (XEXP (SUBREG_REG (opleft), 0),
2910+
SUBREG_REG (XEXP (opright, 0)))
2911+
&& CONST_INT_P (XEXP (SUBREG_REG (opleft), 1))
2912+
&& CONST_INT_P (XEXP (opright, 1))
2913+
&& (INTVAL (XEXP (SUBREG_REG (opleft), 1))
2914+
+ INTVAL (XEXP (opright, 1))
2915+
== GET_MODE_PRECISION (int_mode)))
2916+
return gen_rtx_ROTATE (int_mode, XEXP (opright, 0),
2917+
XEXP (SUBREG_REG (opleft), 1));
2918+
return NULL_RTX;
2919+
}
2920+
28232921
/* Subroutine of simplify_binary_operation. Simplify a binary operation
28242922
CODE with result mode MODE, operating on OP0 and OP1. If OP0 and/or
28252923
OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the
@@ -2831,7 +2929,7 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
28312929
rtx op0, rtx op1,
28322930
rtx trueop0, rtx trueop1)
28332931
{
2834-
rtx tem, reversed, opleft, opright, elt0, elt1;
2932+
rtx tem, reversed, elt0, elt1;
28352933
HOST_WIDE_INT val;
28362934
scalar_int_mode int_mode, inner_mode;
28372935
poly_int64 offset;
@@ -3030,6 +3128,11 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
30303128
return
30313129
simplify_gen_unary (NEG, mode, reversed, mode);
30323130

3131+
/* Convert (plus (ashift A CX) (lshiftrt A CY)) where CX+CY equals the
3132+
mode size to (rotate A CX). */
3133+
if ((tem = simplify_rotate_op (op0, op1, mode)))
3134+
return tem;
3135+
30333136
/* If one of the operands is a PLUS or a MINUS, see if we can
30343137
simplify this by the associative law.
30353138
Don't use the associative law for floating point.
@@ -3462,53 +3565,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
34623565
return op1;
34633566

34643567
/* Convert (ior (ashift A CX) (lshiftrt A CY)) where CX+CY equals the
3465-
mode size to (rotate A CX). */
3466-
3467-
if (GET_CODE (op1) == ASHIFT
3468-
|| GET_CODE (op1) == SUBREG)
3469-
{
3470-
opleft = op1;
3471-
opright = op0;
3472-
}
3473-
else
3474-
{
3475-
opright = op1;
3476-
opleft = op0;
3477-
}
3478-
3479-
if (GET_CODE (opleft) == ASHIFT && GET_CODE (opright) == LSHIFTRT
3480-
&& rtx_equal_p (XEXP (opleft, 0), XEXP (opright, 0)))
3481-
{
3482-
rtx leftcst = unwrap_const_vec_duplicate (XEXP (opleft, 1));
3483-
rtx rightcst = unwrap_const_vec_duplicate (XEXP (opright, 1));
3484-
3485-
if (CONST_INT_P (leftcst) && CONST_INT_P (rightcst)
3486-
&& (INTVAL (leftcst) + INTVAL (rightcst)
3487-
== GET_MODE_UNIT_PRECISION (mode)))
3488-
return gen_rtx_ROTATE (mode, XEXP (opright, 0), XEXP (opleft, 1));
3489-
}
3490-
3491-
/* Same, but for ashift that has been "simplified" to a wider mode
3492-
by simplify_shift_const. */
3493-
3494-
if (GET_CODE (opleft) == SUBREG
3495-
&& is_a <scalar_int_mode> (mode, &int_mode)
3496-
&& is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (opleft)),
3497-
&inner_mode)
3498-
&& GET_CODE (SUBREG_REG (opleft)) == ASHIFT
3499-
&& GET_CODE (opright) == LSHIFTRT
3500-
&& GET_CODE (XEXP (opright, 0)) == SUBREG
3501-
&& known_eq (SUBREG_BYTE (opleft), SUBREG_BYTE (XEXP (opright, 0)))
3502-
&& GET_MODE_SIZE (int_mode) < GET_MODE_SIZE (inner_mode)
3503-
&& rtx_equal_p (XEXP (SUBREG_REG (opleft), 0),
3504-
SUBREG_REG (XEXP (opright, 0)))
3505-
&& CONST_INT_P (XEXP (SUBREG_REG (opleft), 1))
3506-
&& CONST_INT_P (XEXP (opright, 1))
3507-
&& (INTVAL (XEXP (SUBREG_REG (opleft), 1))
3508-
+ INTVAL (XEXP (opright, 1))
3509-
== GET_MODE_PRECISION (int_mode)))
3510-
return gen_rtx_ROTATE (int_mode, XEXP (opright, 0),
3511-
XEXP (SUBREG_REG (opleft), 1));
3568+
mode size to (rotate A CX). */
3569+
tem = simplify_rotate_op (op0, op1, mode);
3570+
if (tem)
3571+
return tem;
35123572

35133573
/* If OP0 is (ashiftrt (plus ...) C), it might actually be
35143574
a (sign_extend (plus ...)). Then check if OP1 is a CONST_INT and
@@ -3838,6 +3898,12 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
38383898
return tem;
38393899
}
38403900

3901+
/* Convert (xor (ashift A CX) (lshiftrt A CY)) where CX+CY equals the
3902+
mode size to (rotate A CX). */
3903+
tem = simplify_rotate_op (op0, op1, mode);
3904+
if (tem)
3905+
return tem;
3906+
38413907
/* Convert (xor (and (not A) B) A) into A | B. */
38423908
if (GET_CODE (op0) == AND
38433909
&& GET_CODE (XEXP (op0, 0)) == NOT
@@ -8676,6 +8742,46 @@ test_vec_merge (machine_mode mode)
86768742
simplify_rtx (nvm));
86778743
}
86788744

8745+
/* Test that vector rotate formation works at RTL level. Try various
8746+
combinations of (REG << C) [|,^,+] (REG >> (<bitwidth> - C)). */
8747+
8748+
static void
8749+
test_vector_rotate (rtx reg)
8750+
{
8751+
machine_mode mode = GET_MODE (reg);
8752+
unsigned bitwidth = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8753+
rtx plus_rtx = gen_rtx_PLUS (mode, reg, reg);
8754+
rtx lshftrt_amnt = GEN_INT (bitwidth - 1);
8755+
lshftrt_amnt = gen_const_vec_duplicate (mode, lshftrt_amnt);
8756+
rtx lshiftrt_rtx = gen_rtx_LSHIFTRT (mode, reg, lshftrt_amnt);
8757+
rtx rotate_rtx = gen_rtx_ROTATE (mode, reg, CONST1_RTX (mode));
8758+
/* Test explicitly the case where ASHIFT (x, 1) is a PLUS (x, x). */
8759+
ASSERT_RTX_EQ (rotate_rtx,
8760+
simplify_rtx (gen_rtx_IOR (mode, plus_rtx, lshiftrt_rtx)));
8761+
ASSERT_RTX_EQ (rotate_rtx,
8762+
simplify_rtx (gen_rtx_XOR (mode, plus_rtx, lshiftrt_rtx)));
8763+
ASSERT_RTX_EQ (rotate_rtx,
8764+
simplify_rtx (gen_rtx_PLUS (mode, plus_rtx, lshiftrt_rtx)));
8765+
8766+
/* Don't go through every possible rotate amount to save execution time.
8767+
Multiple of BITS_PER_UNIT amounts could conceivably be simplified to
8768+
other bswap operations sometimes. Go through just the odd amounts. */
8769+
for (unsigned i = 3; i < bitwidth - 2; i += 2)
8770+
{
8771+
rtx rot_amnt = gen_const_vec_duplicate (mode, GEN_INT (i));
8772+
rtx ashift_rtx = gen_rtx_ASHIFT (mode, reg, rot_amnt);
8773+
lshftrt_amnt = gen_const_vec_duplicate (mode, GEN_INT (bitwidth - i));
8774+
lshiftrt_rtx = gen_rtx_LSHIFTRT (mode, reg, lshftrt_amnt);
8775+
rotate_rtx = gen_rtx_ROTATE (mode, reg, rot_amnt);
8776+
ASSERT_RTX_EQ (rotate_rtx,
8777+
simplify_rtx (gen_rtx_IOR (mode, ashift_rtx, lshiftrt_rtx)));
8778+
ASSERT_RTX_EQ (rotate_rtx,
8779+
simplify_rtx (gen_rtx_XOR (mode, ashift_rtx, lshiftrt_rtx)));
8780+
ASSERT_RTX_EQ (rotate_rtx,
8781+
simplify_rtx (gen_rtx_PLUS (mode, ashift_rtx, lshiftrt_rtx)));
8782+
}
8783+
}
8784+
86798785
/* Test subregs of integer vector constant X, trying elements in
86808786
the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)),
86818787
where NELTS is the number of elements in X. Subregs involving
@@ -8847,11 +8953,13 @@ test_vector_ops ()
88478953
{
88488954
rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode));
88498955
test_vector_ops_duplicate (mode, scalar_reg);
8956+
rtx vector_reg = make_test_reg (mode);
88508957
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
88518958
&& maybe_gt (GET_MODE_NUNITS (mode), 2))
88528959
{
88538960
test_vector_ops_series (mode, scalar_reg);
88548961
test_vector_subregs (mode);
8962+
test_vector_rotate (vector_reg);
88558963
}
88568964
test_vec_merge (mode);
88578965
}

0 commit comments

Comments
 (0)