From b4b8b0cc901ce41a0fb025885897d022eab390f2 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 18 Mar 2011 02:34:11 +0000 Subject: [PATCH] Add a target-specific branchless method for double-width relational comparisons on x86. Essentially, the way this works is that SUB+SBB sets the relevant flags the same way a double-width CMP would. This is a substantial improvement over the generic lowering in LLVM. The output is also shorter than the gcc-generated output; I haven't done any detailed benchmarking, though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127852 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 77 ++++++++++++++++++++++++++++-- test/CodeGen/X86/long-setcc.ll | 39 ++++++++++----- test/CodeGen/X86/sext-i1.ll | 3 +- 3 files changed, 103 insertions(+), 16 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a4d01a10a72..976da17f2f0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -446,12 +446,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC , MVT::i8 , Custom); setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); + setOperationAction(ISD::SETCC , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); - setOperationAction(ISD::SETCC , MVT::i64 , Custom); + setOperationAction(ISD::SETCC , MVT::i128 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); @@ -2839,7 +2840,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { // X < 0 -> X == 0, jump on sign. return X86::COND_S; - } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { + } else if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) { // X < 1 -> X <= 0 RHS = DAG.getConstant(0, RHS.getValueType()); return X86::COND_LE; @@ -7422,7 +7423,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && + if (isTypeLegal(Op0.getValueType()) && + Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && cast(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { @@ -7434,7 +7436,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if (Op1.getOpcode() == ISD::Constant && - (cast(Op1)->getZExtValue() == 1 || + (cast(Op1)->isOne() || cast(Op1)->isNullValue()) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { @@ -7457,6 +7459,73 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (X86CC == X86::COND_INVALID) return SDValue(); + if ((!Subtarget->is64Bit() && Op0.getValueType() == MVT::i64) || + (Subtarget->is64Bit() && Op0.getValueType() == MVT::i128)) { + switch (X86CC) { + case X86::COND_E: + case X86::COND_NE: + case X86::COND_S: + case X86::COND_NS: + // Just use the generic lowering, which works well on x86. + return SDValue(); + case X86::COND_B: + case X86::COND_AE: + case X86::COND_L: + case X86::COND_GE: + // Use SBB-based lowering. + break; + case X86::COND_A: + // Use SBB-based lowering; commute so ZF isn't used. + X86CC = X86::COND_B; + std::swap(Op0, Op1); + break; + case X86::COND_BE: + // Use SBB-based lowering; commute so ZF isn't used. + X86CC = X86::COND_AE; + std::swap(Op0, Op1); + break; + case X86::COND_G: + // Use SBB-based lowering; commute so ZF isn't used. + X86CC = X86::COND_L; + std::swap(Op0, Op1); + break; + case X86::COND_LE: + // Use SBB-based lowering; commute so ZF isn't used. + X86CC = X86::COND_GE; + std::swap(Op0, Op1); + break; + default: + assert(0 && "Unexpected X86CC."); + return SDValue(); + } + MVT HalfType = getPointerTy(); + // FIXME: Refactor this code out to implement ISD::SADDO and friends. + SDValue Op0Low = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType, + Op0, DAG.getIntPtrConstant(0)); + SDValue Op1Low = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType, + Op1, DAG.getIntPtrConstant(0)); + SDValue Op0High = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType, + Op0, DAG.getIntPtrConstant(1)); + SDValue Op1High = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType, + Op1, DAG.getIntPtrConstant(1)); + // Redirect some cases which will simplify to the generic expansion; + // X86ISD::SUB and X86ISD::SBB are not optimized well at the moment. + // FIXME: We really need to add DAGCombines for SUB/SBB/etc. + if (Op1Low.getOpcode() == ISD::Constant && + cast(Op1Low)->isNullValue()) + return SDValue(); + if (Op0Low.getOpcode() == ISD::Constant && + cast(Op0Low)->isAllOnesValue()) + return SDValue(); + SDValue res1, res2; + SDVTList VTList = DAG.getVTList(HalfType, MVT::i32); + res1 = DAG.getNode(X86ISD::SUB, dl, VTList, Op0Low, Op1Low).getValue(1); + res2 = DAG.getNode(X86ISD::SBB, dl, VTList, Op0High, Op1High, + res1).getValue(1); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), res2); + } + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), EFLAGS); diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll index e0165fb01b5..0ab81606882 100644 --- a/test/CodeGen/X86/long-setcc.ll +++ b/test/CodeGen/X86/long-setcc.ll @@ -1,18 +1,35 @@ -; RUN: llc < %s -march=x86 | grep cmp | count 1 -; RUN: llc < %s -march=x86 | grep shr | count 1 -; RUN: llc < %s -march=x86 | grep xor | count 1 +; RUN: llc < %s -march=x86 | FileCheck %s -define i1 @t1(i64 %x) nounwind { - %B = icmp slt i64 %x, 0 - ret i1 %B +; General case +define i1 @t1(i64 %x, i64 %y) nounwind { +; CHECK: @t1 +; CHECK: subl +; CHECK: sbbl +; CHECK: setl %al + %B = icmp slt i64 %x, %y + ret i1 %B } +; Some special cases define i1 @t2(i64 %x) nounwind { - %tmp = icmp ult i64 %x, 4294967296 - ret i1 %tmp +; CHECK: @t2 +; CHECK: shrl $31, %eax + %B = icmp slt i64 %x, 0 + ret i1 %B } -define i1 @t3(i32 %x) nounwind { - %tmp = icmp ugt i32 %x, -1 - ret i1 %tmp +define i1 @t3(i64 %x) nounwind { +; CHECK: @t3 +; CHECX: cmpl $0 +; CHECX: sete %al + %tmp = icmp ult i64 %x, 4294967296 + ret i1 %tmp +} + +define i1 @t4(i64 %x) nounwind { +; CHECK: @t4 +; CHECX: cmpl $0 +; CHECX: setne %al + %tmp = icmp ugt i64 %x, 4294967295 + ret i1 %tmp } diff --git a/test/CodeGen/X86/sext-i1.ll b/test/CodeGen/X86/sext-i1.ll index 574769b4308..26048693925 100644 --- a/test/CodeGen/X86/sext-i1.ll +++ b/test/CodeGen/X86/sext-i1.ll @@ -39,7 +39,8 @@ entry: ; 32: t3: ; 32: cmpl $1 ; 32: sbbl -; 32: cmpl +; 32: subl +; 32: sbbl ; 32: xorl ; 64: t3: