Skip to content

Commit b6a897a

Browse files
committed
[VPlan] Consolidate logic for narrow to single scalars
The logic for narrowing to single scalar recipes is in two different places: narrowToSingleScalarRecipes and legalizeAndOptimizeInductions. Consolidate them, with minor test changes.
1 parent 67c6604 commit b6a897a

File tree

7 files changed

+28
-48
lines changed

7 files changed

+28
-48
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -631,30 +631,6 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
631631
if (!PhiR)
632632
continue;
633633

634-
// Try to narrow wide and replicating recipes to uniform recipes, based on
635-
// VPlan analysis.
636-
// TODO: Apply to all recipes in the future, to replace legacy uniformity
637-
// analysis.
638-
auto Users = collectUsersRecursively(PhiR);
639-
for (VPUser *U : reverse(Users)) {
640-
auto *Def = dyn_cast<VPSingleDefRecipe>(U);
641-
auto *RepR = dyn_cast<VPReplicateRecipe>(U);
642-
// Skip recipes that shouldn't be narrowed.
643-
if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
644-
Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
645-
(RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
646-
continue;
647-
648-
// Skip recipes that may have other lanes than their first used.
649-
if (!vputils::isSingleScalar(Def) && !vputils::onlyFirstLaneUsed(Def))
650-
continue;
651-
652-
auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
653-
Def->operands(), /*IsUniform*/ true);
654-
Clone->insertAfter(Def);
655-
Def->replaceAllUsesWith(Clone);
656-
}
657-
658634
// Replace wide pointer inductions which have only their scalars used by
659635
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
660636
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1239,10 +1215,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
12391215
continue;
12401216

12411217
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
1242-
// Skip recipes that aren't single scalars or don't have only their
1243-
// scalar results used. In the latter case, we would introduce extra
1244-
// broadcasts.
1218+
// Skip recipes that aren't single scalars, that don't have users, and
1219+
// that don't have only their scalar results used (this would introduce
1220+
// extra broadcasts).
12451221
if (!vputils::isSingleScalar(RepOrWidenR) ||
1222+
RepOrWidenR->getNumUsers() == 0 ||
12461223
any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
12471224
return !U->usesScalars(RepOrWidenR);
12481225
}))

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
4040
/// Returns true if \p VPV is a single scalar, either because it produces the
4141
/// same value for all lanes or only has its first lane used.
4242
inline bool isSingleScalar(const VPValue *VPV) {
43+
if (onlyFirstLaneUsed(VPV))
44+
return true;
45+
4346
auto PreservesUniformity = [](unsigned Opcode) -> bool {
4447
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
4548
return true;

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
5757
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
5858
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
5959
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
60-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
61-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
60+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
6261
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
62+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
6363
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
64-
; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
64+
; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
6565
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6666
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
6767
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
5757
; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[INDEX]], -2
5858
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
5959
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
60-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
61-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
60+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
6261
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
62+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
6363
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
64-
; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
64+
; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
6565
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6666
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
6767
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,11 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
240240
; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 3
241241
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
242242
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
243-
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
244-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0
243+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP3]], i64 0
245244
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
245+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
246246
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
247-
; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
247+
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[TMP5]], align 8
248248
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
249249
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
250250
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
7979
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
8080
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
8181
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
82-
; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
83-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
82+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
8483
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
84+
; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
8585
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
86-
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
86+
; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
8787
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
8888
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
8989
; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -159,11 +159,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
159159
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 2
160160
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
161161
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
162-
; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
163-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
162+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
164163
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
164+
; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
165165
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
166-
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
166+
; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
167167
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
168168
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
169169
; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -182,11 +182,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
182182
; VF4-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 2
183183
; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
184184
; VF4-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
185-
; VF4-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
186-
; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
185+
; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0
187186
; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
187+
; VF4-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
188188
; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
189-
; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
189+
; VF4-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP4]], align 8
190190
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
191191
; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
192192
; VF4-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
115115
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
116116
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
117117
; VF2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
118-
; VF2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
119-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
118+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
120119
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
120+
; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
121121
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
122-
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
122+
; VF2-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP6]], align 8
123123
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
124124
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
125125
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]

0 commit comments

Comments
 (0)