Transform (x << (y&31)) -> (x << y). This takes advantage of the fact x86 shift instructions 2nd operand (shift count) is limited to 0 to 31 (or 63 in the x86-64 case).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55558 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2008-08-30 02:03:58 +00:00
parent dcf114e861
commit eb9f89287e
4 changed files with 140 additions and 0 deletions

View File

@ -2310,6 +2310,26 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(VT.getSizeInBits())))
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), c))
// iff (trunc c) == c
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
SDValue N101 = N1.getOperand(0).getOperand(1);
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101);
if (N101C) {
MVT TruncVT = N1.getValueType();
unsigned TruncBitSize = TruncVT.getSizeInBits();
APInt ShAmt = N101C->getAPIntValue();
if (ShAmt.trunc(TruncBitSize).getZExtValue() == N101C->getValue()) {
SDValue N100 = N1.getOperand(0).getOperand(0);
return DAG.getNode(ISD::SHL, VT, N0,
DAG.getNode(ISD::AND, TruncVT,
DAG.getNode(ISD::TRUNCATE, TruncVT, N100),
DAG.getConstant(N101C->getValue(), TruncVT)));
}
}
}
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, c1+c2)
@ -2421,6 +2441,26 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), c))
// iff (trunc c) == c
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
SDValue N101 = N1.getOperand(0).getOperand(1);
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101);
if (N101C) {
MVT TruncVT = N1.getValueType();
unsigned TruncBitSize = TruncVT.getSizeInBits();
APInt ShAmt = N101C->getAPIntValue();
if (ShAmt.trunc(TruncBitSize).getZExtValue() == N101C->getValue()) {
SDValue N100 = N1.getOperand(0).getOperand(0);
return DAG.getNode(ISD::SRA, VT, N0,
DAG.getNode(ISD::AND, TruncVT,
DAG.getNode(ISD::TRUNCATE, TruncVT, N100),
DAG.getConstant(N101C->getValue(), TruncVT)));
}
}
}
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@ -2520,6 +2560,26 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(1, VT));
}
}
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), c))
// iff (trunc c) == c
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
SDValue N101 = N1.getOperand(0).getOperand(1);
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101);
if (N101C) {
MVT TruncVT = N1.getValueType();
unsigned TruncBitSize = TruncVT.getSizeInBits();
APInt ShAmt = N101C->getAPIntValue();
if (ShAmt.trunc(TruncBitSize).getZExtValue() == N101C->getValue()) {
SDValue N100 = N1.getOperand(0).getOperand(0);
return DAG.getNode(ISD::SRL, VT, N0,
DAG.getNode(ISD::AND, TruncVT,
DAG.getNode(ISD::TRUNCATE, TruncVT, N100),
DAG.getConstant(N101C->getValue(), TruncVT)));
}
}
}
// fold operands of srl based on knowledge that the low bits are not
// demanded.

View File

@ -1341,6 +1341,22 @@ def : Pat<(i8 (trunc GR16:$src)),
// (shl x, 1) ==> (add x, x)
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
// (shl x (and y, 63)) ==> (shl x, y)
def : Pat<(shl GR64:$src1, (and CL:$amt, 63)),
(SHL64rCL GR64:$src1)>;
def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
(SHL64mCL addr:$dst)>;
def : Pat<(srl GR64:$src1, (and CL:$amt, 63)),
(SHR64rCL GR64:$src1)>;
def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
(SHR64mCL addr:$dst)>;
def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
(SAR64rCL GR64:$src1)>;
def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
def : Pat<(or (srl GR64:$src1, CL:$amt),
(shl GR64:$src2, (sub 64, CL:$amt))),

View File

@ -2849,6 +2849,46 @@ def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
// (shl x (and y, 31)) ==> (shl x, y)
def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
(SHL8rCL GR8:$src1)>;
def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
(SHL16rCL GR16:$src1)>;
def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
(SHL32rCL GR32:$src1)>;
def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL8mCL addr:$dst)>;
def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL16mCL addr:$dst)>;
def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL32mCL addr:$dst)>;
def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
(SHR8rCL GR8:$src1)>;
def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
(SHR16rCL GR16:$src1)>;
def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
(SHR32rCL GR32:$src1)>;
def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR8mCL addr:$dst)>;
def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR16mCL addr:$dst)>;
def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR32mCL addr:$dst)>;
def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
(SAR8rCL GR8:$src1)>;
def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
(SAR16rCL GR16:$src1)>;
def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
(SAR32rCL GR32:$src1)>;
def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR8mCL addr:$dst)>;
def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR16mCL addr:$dst)>;
def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
def : Pat<(or (srl GR32:$src1, CL:$amt),
(shl GR32:$src2, (sub 32, CL:$amt))),

View File

@ -0,0 +1,24 @@
; RUN: llvm-as < %s | llc -march=x86 | grep and | count 1
; RUN: llvm-as < %s | llc -march=x86-64 | not grep and
define i32 @t1(i32 %t, i32 %val) nounwind {
%shamt = and i32 %t, 31
%res = shl i32 %val, %shamt
ret i32 %res
}
@X = internal global i16 0
define void @t2(i16 %t) nounwind {
%shamt = and i16 %t, 31
%tmp = load i16* @X
%tmp1 = ashr i16 %tmp, %shamt
store i16 %tmp1, i16* @X
ret void
}
define i64 @t3(i64 %t, i64 %val) nounwind {
%shamt = and i64 %t, 63
%res = lshr i64 %val, %shamt
ret i64 %res
}