-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[AggressiveInstCombine] Refactor foldLoadsRecursive
to use m_ShlOrSelf
#155176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch was a part of #154375.
Full diff: https://github.com/llvm/llvm-project/pull/155176.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 40a7f8043034e..40de36d81ddd2 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -83,8 +83,8 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
// == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
if (match(V, m_OneUse(m_c_Or(
m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
- m_LShr(m_Value(ShVal1),
- m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) {
+ m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
+ m_Deferred(ShAmt))))))) {
return Intrinsic::fshl;
}
@@ -617,7 +617,7 @@ struct LoadOps {
LoadInst *RootInsert = nullptr;
bool FoundRoot = false;
uint64_t LoadSize = 0;
- const APInt *Shift = nullptr;
+ uint64_t Shift = 0;
Type *ZextType;
AAMDNodes AATags;
};
@@ -627,17 +627,15 @@ struct LoadOps {
// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
AliasAnalysis &AA) {
- const APInt *ShAmt2 = nullptr;
+ uint64_t ShAmt2;
Value *X;
Instruction *L1, *L2;
// Go to the last node with loads.
- if (match(V, m_OneUse(m_c_Or(
- m_Value(X),
- m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))),
- m_APInt(ShAmt2)))))) ||
- match(V, m_OneUse(m_Or(m_Value(X),
- m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) {
+ if (match(V,
+ m_OneUse(m_c_Or(m_Value(X), m_OneUse(m_ShlOrSelf(
+ m_OneUse(m_ZExt(m_Instruction(L2))),
+ ShAmt2)))))) {
if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
// Avoid Partial chain merge.
return false;
@@ -646,11 +644,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
// Check if the pattern has loads
LoadInst *LI1 = LOps.Root;
- const APInt *ShAmt1 = LOps.Shift;
+ uint64_t ShAmt1 = LOps.Shift;
if (LOps.FoundRoot == false &&
- (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) ||
- match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))),
- m_APInt(ShAmt1)))))) {
+ match(X, m_OneUse(
+ m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
LI1 = dyn_cast<LoadInst>(L1);
}
LoadInst *LI2 = dyn_cast<LoadInst>(L2);
@@ -726,13 +723,6 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
if (IsBigEndian)
std::swap(ShAmt1, ShAmt2);
- // Find Shifts values.
- uint64_t Shift1 = 0, Shift2 = 0;
- if (ShAmt1)
- Shift1 = ShAmt1->getZExtValue();
- if (ShAmt2)
- Shift2 = ShAmt2->getZExtValue();
-
// First load is always LI1. This is where we put the new load.
// Use the merged load size available from LI1 for forward loads.
if (LOps.FoundRoot) {
@@ -747,7 +737,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
uint64_t PrevSize =
DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
- if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
+ if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
return false;
// Update LOps
@@ -824,7 +814,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
// Check if shift needed. We need to shift with the amount of load1
// shift if not zero.
if (LOps.Shift)
- NewOp = Builder.CreateShl(NewOp, ConstantInt::get(I.getContext(), *LOps.Shift));
+ NewOp = Builder.CreateShl(NewOp, LOps.Shift);
I.replaceAllUsesWith(NewOp);
return true;
@@ -860,11 +850,9 @@ static std::optional<PartStore> matchPartStore(Instruction &I,
return std::nullopt;
uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
- uint64_t ValOffset = 0;
+ uint64_t ValOffset;
Value *Val;
- if (!match(StoredVal, m_CombineOr(m_Trunc(m_LShr(m_Value(Val),
- m_ConstantInt(ValOffset))),
- m_Trunc(m_Value(Val)))))
+ if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
return std::nullopt;
Value *Ptr = Store->getPointerOperand();
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index f6716c817b739..46ec9e0a50842 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -101,6 +101,107 @@ define i32 @loadCombine_4consecutive(ptr %p) {
ret i32 %o3
}
+define i32 @loadCombine_4consecutive_commuted(ptr %p) {
+; LE-LABEL: @loadCombine_4consecutive_commuted(
+; LE-NEXT: [[O3:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_commuted(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[S3]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[S4]], [[O1]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[E1]], [[O2]]
+; BE-NEXT: ret i32 [[O3]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %e3 = zext i8 %l3 to i32
+ %e4 = zext i8 %l4 to i32
+
+ %s2 = shl i32 %e2, 8
+ %s3 = shl i32 %e3, 16
+ %s4 = shl i32 %e4, 24
+
+ %o1 = or i32 %s2, %s3
+ %o2 = or i32 %s4, %o1
+ %o3 = or i32 %e1, %o2
+ ret i32 %o3
+}
+
+define i32 @loadCombine_4consecutive_multiuse(ptr %p) {
+; LE-LABEL: @loadCombine_4consecutive_multiuse(
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
+; LE-NEXT: [[O3:%.*]] = load i32, ptr [[P]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: call void @use(i8 [[L4]])
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_multiuse(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: call void @use(i8 [[L4]])
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+ call void @use(i8 %l4)
+
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %e3 = zext i8 %l3 to i32
+ %e4 = zext i8 %l4 to i32
+
+ %s2 = shl i32 %e2, 8
+ %s3 = shl i32 %e3, 16
+ %s4 = shl i32 %e4, 24
+
+ %o1 = or i32 %e1, %s2
+ %o2 = or i32 %o1, %s3
+ %o3 = or i32 %o2, %s4
+ ret i32 %o3
+}
+
define i32 @loadCombine_4consecutive_BE(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_BE(
; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I believe handling the multi-use case is fine, because GVN will later replace the smaller load with an extract from the larger load anyway.
match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), | ||
m_APInt(ShAmt1)))))) { | ||
match(X, m_OneUse( | ||
m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a followup, could check whether we can relax more of these one-use checks.
foldLoadsRecursive
to use m_ShlOrSelf
foldLoadsRecursive
to use m_ShlOrSelf
This patch was a part of #154375.
Two functional changes: