diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d0a36100a26..bb9dde00766 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2310,6 +2310,26 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()))) return DAG.getConstant(0, VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), c)) + // iff (trunc c) == c + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue N101 = N1.getOperand(0).getOperand(1); + ConstantSDNode *N101C = dyn_cast(N101); + if (N101C) { + MVT TruncVT = N1.getValueType(); + unsigned TruncBitSize = TruncVT.getSizeInBits(); + APInt ShAmt = N101C->getAPIntValue(); + if (ShAmt.trunc(TruncBitSize).getZExtValue() == N101C->getValue()) { + SDValue N100 = N1.getOperand(0).getOperand(0); + return DAG.getNode(ISD::SHL, VT, N0, + DAG.getNode(ISD::AND, TruncVT, + DAG.getNode(ISD::TRUNCATE, TruncVT, N100), + DAG.getConstant(N101C->getValue(), TruncVT))); + } + } + } + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, c1+c2) @@ -2421,6 +2441,26 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), c)) + // iff (trunc c) == c + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue N101 = N1.getOperand(0).getOperand(1); + ConstantSDNode *N101C = dyn_cast(N101); + if (N101C) { + MVT TruncVT = N1.getValueType(); + unsigned TruncBitSize = TruncVT.getSizeInBits(); + APInt ShAmt = N101C->getAPIntValue(); + if (ShAmt.trunc(TruncBitSize).getZExtValue() == N101C->getValue()) { + SDValue N100 = N1.getOperand(0).getOperand(0); + return DAG.getNode(ISD::SRA, VT, N0, + DAG.getNode(ISD::AND, TruncVT, + DAG.getNode(ISD::TRUNCATE, TruncVT, N100), + DAG.getConstant(N101C->getValue(), TruncVT))); + } + } + } + // Simplify, based on bits shifted out of the LHS. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -2520,6 +2560,26 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(1, VT)); } } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), c)) + // iff (trunc c) == c + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue N101 = N1.getOperand(0).getOperand(1); + ConstantSDNode *N101C = dyn_cast(N101); + if (N101C) { + MVT TruncVT = N1.getValueType(); + unsigned TruncBitSize = TruncVT.getSizeInBits(); + APInt ShAmt = N101C->getAPIntValue(); + if (ShAmt.trunc(TruncBitSize).getZExtValue() == N101C->getValue()) { + SDValue N100 = N1.getOperand(0).getOperand(0); + return DAG.getNode(ISD::SRL, VT, N0, + DAG.getNode(ISD::AND, TruncVT, + DAG.getNode(ISD::TRUNCATE, TruncVT, N100), + DAG.getConstant(N101C->getValue(), TruncVT))); + } + } + } // fold operands of srl based on knowledge that the low bits are not // demanded. diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 239ae972302..07f6091d41a 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1341,6 +1341,22 @@ def : Pat<(i8 (trunc GR16:$src)), // (shl x, 1) ==> (add x, x) def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +// (shl x (and y, 63)) ==> (shl x, y) +def : Pat<(shl GR64:$src1, (and CL:$amt, 63)), + (SHL64rCL GR64:$src1)>; +def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), + (SHL64mCL addr:$dst)>; + +def : Pat<(srl GR64:$src1, (and CL:$amt, 63)), + (SHR64rCL GR64:$src1)>; +def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), + (SHR64mCL addr:$dst)>; + +def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), + (SAR64rCL GR64:$src1)>; +def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), + (SAR64mCL addr:$dst)>; + // (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c) def : Pat<(or (srl GR64:$src1, CL:$amt), (shl GR64:$src2, (sub 64, CL:$amt))), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index ee84fc19f11..110051c8de2 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2849,6 +2849,46 @@ def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; +// (shl x (and y, 31)) ==> (shl x, y) +def : Pat<(shl GR8:$src1, (and CL:$amt, 31)), + (SHL8rCL GR8:$src1)>; +def : Pat<(shl GR16:$src1, (and CL:$amt, 31)), + (SHL16rCL GR16:$src1)>; +def : Pat<(shl GR32:$src1, (and CL:$amt, 31)), + (SHL32rCL GR32:$src1)>; +def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHL8mCL addr:$dst)>; +def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHL16mCL addr:$dst)>; +def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHL32mCL addr:$dst)>; + +def : Pat<(srl GR8:$src1, (and CL:$amt, 31)), + (SHR8rCL GR8:$src1)>; +def : Pat<(srl GR16:$src1, (and CL:$amt, 31)), + (SHR16rCL GR16:$src1)>; +def : Pat<(srl GR32:$src1, (and CL:$amt, 31)), + (SHR32rCL GR32:$src1)>; +def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHR8mCL addr:$dst)>; +def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHR16mCL addr:$dst)>; +def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHR32mCL addr:$dst)>; + +def : Pat<(sra GR8:$src1, (and CL:$amt, 31)), + (SAR8rCL GR8:$src1)>; +def : Pat<(sra GR16:$src1, (and CL:$amt, 31)), + (SAR16rCL GR16:$src1)>; +def : Pat<(sra GR32:$src1, (and CL:$amt, 31)), + (SAR32rCL GR32:$src1)>; +def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SAR8mCL addr:$dst)>; +def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SAR16mCL addr:$dst)>; +def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SAR32mCL addr:$dst)>; + // (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) def : Pat<(or (srl GR32:$src1, CL:$amt), (shl GR32:$src2, (sub 32, CL:$amt))), diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll new file mode 100644 index 00000000000..b6d78a48578 --- /dev/null +++ b/test/CodeGen/X86/shift-and.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep and | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 | not grep and + +define i32 @t1(i32 %t, i32 %val) nounwind { + %shamt = and i32 %t, 31 + %res = shl i32 %val, %shamt + ret i32 %res +} + +@X = internal global i16 0 + +define void @t2(i16 %t) nounwind { + %shamt = and i16 %t, 31 + %tmp = load i16* @X + %tmp1 = ashr i16 %tmp, %shamt + store i16 %tmp1, i16* @X + ret void +} + +define i64 @t3(i64 %t, i64 %val) nounwind { + %shamt = and i64 %t, 63 + %res = lshr i64 %val, %shamt + ret i64 %res +}