mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-01 17:32:36 +00:00
Prevent a DAGCombine from firing where there are two uses of
a combined-away node and the result of the combine isn't substantially smaller than the input, it's just canonicalized. This is the first part of a significant (7%) performance gain for Snappy's hot decompression loop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147604 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1e141a854e
commit
62dfc51152
@ -3331,7 +3331,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
|
||||
|
||||
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
|
||||
// (and (srl x, (sub c1, c2), MASK)
|
||||
if (N1C && N0.getOpcode() == ISD::SRL &&
|
||||
// Only fold this if the inner shift has no other uses -- if it does, folding
|
||||
// this will increase the total number of instructions.
|
||||
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
|
||||
N0.getOperand(1).getOpcode() == ISD::Constant) {
|
||||
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
|
||||
if (c1 < VT.getSizeInBits()) {
|
||||
|
@ -48,3 +48,23 @@ entry:
|
||||
%tmp512 = lshr i32 %tmp4, 24
|
||||
ret i32 %tmp512
|
||||
}
|
||||
|
||||
define i64 @test5(i16 %i, i32* %arr) {
|
||||
; Ensure that we don't fold away shifts which have multiple uses, as they are
|
||||
; just re-introduced for the second use.
|
||||
; CHECK: test5:
|
||||
; CHECK-NOT: shrl
|
||||
; CHECK: shrl $11
|
||||
; CHECK-NOT: shrl
|
||||
; CHECK: ret
|
||||
|
||||
entry:
|
||||
%i.zext = zext i16 %i to i32
|
||||
%index = lshr i32 %i.zext, 11
|
||||
%index.zext = zext i32 %index to i64
|
||||
%val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
|
||||
%val = load i32* %val.ptr
|
||||
%val.zext = zext i32 %val to i64
|
||||
%sum = add i64 %val.zext, %index.zext
|
||||
ret i64 %sum
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user