diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7da7848930f..0447e3b828e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4655,7 +4655,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.DeleteNode(Trunc); } // Replace the uses of SRL with SETCC - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4663,6 +4664,56 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } } } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { + SDNode *TheXor = N1.getNode(); + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() == Op1.getOpcode()) { + // Avoid missing important xor optimizations. + SDValue Tmp = visitXOR(TheXor); + if (Tmp.getNode()) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + } + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (ConstantSDNode *RHSCI = dyn_cast(Op0)) + if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + EVT SetCCVT = N1.getValueType(); + if (LegalTypes) + SetCCVT = TLI.getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT, + Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } return SDValue(); } @@ -5012,7 +5063,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DEBUG(dbgs() << "\nReplacing.6 "; + DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e88af4f3e02..2762ce8d57c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1775,7 +1775,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, break; // todo, be more careful with signed comparisons } } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { EVT ExtSrcTy = cast(N0.getOperand(1))->getVT(); unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); EVT ExtDstTy = N0.getValueType(); @@ -1809,22 +1809,21 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC - if (N0.getOpcode() == ISD::SETCC) { + if (N0.getOpcode() == ISD::SETCC && + isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); if (TrueWhenTrue) - return N0; - + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } - + if ((N0.getOpcode() == ISD::XOR || - (N0.getOpcode() == ISD::AND && + (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa(N0.getOperand(1)) && @@ -1847,9 +1846,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getOperand(0), N0.getOperand(1)); } + return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } + } else if (N1C->getAPIntValue() == 1 && + (VT == MVT::i1 || + getBooleanContents() == ZeroOrOneBooleanContent)) { + SDValue Op0 = N0; + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + + if ((Op0.getOpcode() == ISD::XOR) && + Op0.getOperand(0).getOpcode() == ISD::SETCC && + Op0.getOperand(1).getOpcode() == ISD::SETCC) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), + Cond); + } else if (Op0.getOpcode() == ISD::AND && + isa(Op0.getOperand(1)) && + cast(Op0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. + if (Op0.getValueType() != VT) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eb0a995432e..b0a1bff04c1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5883,26 +5883,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, +static SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { + SDValue Op0 = And.getOperand(0); + SDValue Op1 = And.getOperand(1); + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::TRUNCATE) + Op1 = Op1.getOperand(0); + SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); + if (Op1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And10C = dyn_cast(Op1.getOperand(0))) + if (And10C->getZExtValue() == 1) { + LHS = Op0; + RHS = Op1.getOperand(1); } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); + } else if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And00C = dyn_cast(Op0.getOperand(0))) + if (And00C->getZExtValue() == 1) { + LHS = Op1; + RHS = Op0.getOperand(1); } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); + } else if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast(Op1); + SDValue AndLHS = Op0; if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); @@ -5952,6 +5957,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return NewSetCC; } + // Look for "(setcc) == / != 1" to avoid unncessary setcc. + if (Op0.getOpcode() == X86ISD::SETCC && + Op1.getOpcode() == ISD::Constant && + (cast(Op1)->getZExtValue() == 1 || + cast(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast(Op1)->isNullValue(); + if (Invert) + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) diff --git a/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll new file mode 100644 index 00000000000..6a58e9e5518 --- /dev/null +++ b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +define i32* @t() nounwind optsize ssp { +entry: +; CHECK: t: +; CHECK: testl %eax, %eax +; CHECK: js + %cmp = icmp slt i32 undef, 0 ; [#uses=1] + %outsearch.0 = select i1 %cmp, i1 false, i1 true ; [#uses=1] + br i1 %outsearch.0, label %if.then27, label %if.else29 + +if.then27: ; preds = %entry + ret i32* undef + +if.else29: ; preds = %entry + unreachable +} + diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll index 4fe554de75a..f29cbf323e3 100644 --- a/test/CodeGen/X86/critical-edge-split.ll +++ b/test/CodeGen/X86/critical-edge-split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31 +; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29 %CC = type { %Register } %II = type { %"struct.XX::II::$_74" } diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll index 627edc51c18..8c1c4097660 100644 --- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll +++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep mov | count 5 +; RUN: llc < %s -march=x86-64 | grep mov | count 3 %struct.COMPOSITE = type { i8, i16, i16 } %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll index bfab1aef901..60620841064 100644 --- a/test/CodeGen/X86/trunc-to-bool.ll +++ b/test/CodeGen/X86/trunc-to-bool.ll @@ -3,13 +3,14 @@ ; value and as the operand of a branch. ; RUN: llc < %s -march=x86 | FileCheck %s -define i1 @test1(i32 %X) zeroext { +define i1 @test1(i32 %X) zeroext nounwind { %Y = trunc i32 %X to i1 ret i1 %Y } +; CHECK: test1: ; CHECK: andl $1, %eax -define i1 @test2(i32 %val, i32 %mask) { +define i1 @test2(i32 %val, i32 %mask) nounwind { entry: %shifted = ashr i32 %val, %mask %anded = and i32 %shifted, 1 @@ -20,9 +21,10 @@ ret_true: ret_false: ret i1 false } -; CHECK: testb $1, %al +; CHECK: test2: +; CHECK: btl %eax -define i32 @test3(i8* %ptr) { +define i32 @test3(i8* %ptr) nounwind { %val = load i8* %ptr %tmp = trunc i8 %val to i1 br i1 %tmp, label %cond_true, label %cond_false @@ -31,9 +33,10 @@ cond_true: cond_false: ret i32 42 } -; CHECK: testb $1, %al +; CHECK: test3: +; CHECK: testb $1, (%eax) -define i32 @test4(i8* %ptr) { +define i32 @test4(i8* %ptr) nounwind { %tmp = ptrtoint i8* %ptr to i1 br i1 %tmp, label %cond_true, label %cond_false cond_true: @@ -41,9 +44,10 @@ cond_true: cond_false: ret i32 42 } -; CHECK: testb $1, %al +; CHECK: test4: +; CHECK: testb $1, 4(%esp) -define i32 @test6(double %d) { +define i32 @test5(double %d) nounwind { %tmp = fptosi double %d to i1 br i1 %tmp, label %cond_true, label %cond_false cond_true: @@ -51,4 +55,5 @@ cond_true: cond_false: ret i32 42 } +; CHECK: test5: ; CHECK: testb $1 diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll index a6bdb13ec6b..2d75c5d7620 100644 --- a/test/CodeGen/X86/xor-icmp.ll +++ b/test/CodeGen/X86/xor-icmp.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 ; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; rdar://7367229 define i32 @t(i32 %a, i32 %b) nounwind ssp { entry: @@ -34,3 +35,33 @@ bb1: ; preds = %entry declare i32 @foo(...) declare i32 @bar(...) + +define i32 @t2(i32 %x, i32 %y) nounwind ssp { +; X32: t2: +; X32: cmpl +; X32: sete +; X32: cmpl +; X32: sete +; X32-NOT: xor +; X32: je + +; X64: t2: +; X64: testl +; X64: sete +; X64: testl +; X64: sete +; X64-NOT: xor +; X64: je +entry: + %0 = icmp eq i32 %x, 0 ; [#uses=1] + %1 = icmp eq i32 %y, 0 ; [#uses=1] + %2 = xor i1 %1, %0 ; [#uses=1] + br i1 %2, label %bb, label %return + +bb: ; preds = %entry + %3 = tail call i32 (...)* @foo() nounwind ; [#uses=0] + ret i32 undef + +return: ; preds = %entry + ret i32 undef +}