From 12bbc52aa7ad84a944f14757f7f6d77b9fa3188f Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 4 Mar 2009 02:33:24 +0000 Subject: [PATCH] Teach the x86 backend to eliminate "test" instructions by using the EFLAGS result from add, sub, inc, and dec instructions in simple cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66004 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 134 ++++++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 14 ++- lib/Target/X86/X86Instr64bit.td | 80 +++++++----- lib/Target/X86/X86InstrInfo.td | 194 +++++++++++++++++------------ test/CodeGen/X86/peep-test.ll | 22 ++++ 5 files changed, 317 insertions(+), 127 deletions(-) create mode 100644 test/CodeGen/X86/peep-test.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c3a29b2a43a..2bac4e49b91 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5361,6 +5361,87 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit); } +/// Emit nodes that will be selected as "test Op0,Op0", or something +/// equivalent. +SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + + if (Op.getResNo() == 0) { + unsigned Opcode = 0; + switch (Op.getNode()->getOpcode()) { + case ISD::ADD: + // Due to an isel shortcoming, be conservative if this add is likely to + // be selected as part of a load-modify-store instruction. When the root + // node in a match is a store, isel doesn't know how to remap non-chain + // non-flag uses of other nodes in the match, such as the ADD in this + // case. This leads to the ADD being left around and reselected, with + // the result being two adds in the output. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() == ISD::STORE) + goto default_case; + // An add of one will be selected as an INC. + if (ConstantSDNode *C = + dyn_cast(Op.getNode()->getOperand(1))) + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + break; + } + // Otherwise use a regular EFLAGS-setting add. + Opcode = X86ISD::ADD; + break; + case ISD::SUB: + // Due to the ISEL shortcoming noted above, be conservative if this sub is + // likely to be selected as part of a load-modify-store instruction. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() == ISD::STORE) + goto default_case; + // A subtract of one will be selected as a DEC. + if (ConstantSDNode *C = + dyn_cast(Op.getNode()->getOperand(1))) + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::DEC; + break; + } + // Otherwise use a regular EFLAGS-setting sub. + Opcode = X86ISD::SUB; + break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::INC: + case X86ISD::DEC: + return SDValue(Op.getNode(), 1); + default: + default_case: + break; + } + if (Opcode != 0) { + const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::i32); + SmallVector Ops; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) + Ops.push_back(Op.getOperand(i)); + SDValue New = DAG.getNode(Opcode, dl, VTs, 2, &Ops[0], Ops.size()); + DAG.ReplaceAllUsesWith(Op, New); + return SDValue(New.getNode(), 1); + } + } + + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); +} + +/// Emit nodes that will be selected as "cmp Op0,Op1", or something +/// equivalent. +SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, SelectionDAG &DAG) { + if (ConstantSDNode *C = dyn_cast(Op1)) + if (C->getAPIntValue() == 0) + return EmitTest(Op0, DAG); + + DebugLoc dl = Op0.getDebugLoc(); + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); +} + SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); @@ -5423,7 +5504,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); - SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); + SDValue Cond = EmitCmp(Op0, Op1, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); } @@ -5542,8 +5623,20 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. -static bool isX86LogicalCmp(unsigned Opc) { - return Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI; +static bool isX86LogicalCmp(SDValue Op) { + unsigned Opc = Op.getNode()->getOpcode(); + if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) + return true; + if (Op.getResNo() == 1 && + Opc == X86ISD::ADD || + Opc == X86ISD::SUB || + Opc == X86ISD::SMUL || + Opc == X86ISD::UMUL || + Opc == X86ISD::INC || + Opc == X86ISD::DEC) + return true; + + return false; } SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { @@ -5569,7 +5662,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { !isScalarFPTypeInSSEReg(VT)) // FPStack? IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); - if ((isX86LogicalCmp(Opc) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME + if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME Cond = Cmp; addTest = false; } @@ -5577,8 +5670,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond= DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond, - DAG.getConstant(0, MVT::i8)); + Cond = EmitTest(Cond, DAG); } const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), @@ -5646,7 +5738,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); // FIXME: WHY THE SPECIAL CASING OF LogicalCmp?? - if (isX86LogicalCmp(Opc) || Opc == X86ISD::BT) { + if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) { Cond = Cmp; addTest = false; } else { @@ -5665,13 +5757,12 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { SDValue Cmp = Cond.getOperand(0).getOperand(1); - unsigned Opc = Cmp.getOpcode(); if (CondOpc == ISD::OR) { // Also, recognize the pattern generated by an FCMP_UNE. We can emit // two branches instead of an explicit OR instruction with a // separate test. if (Cmp == Cond.getOperand(1).getOperand(1) && - isX86LogicalCmp(Opc)) { + isX86LogicalCmp(Cmp)) { CC = Cond.getOperand(0).getOperand(0); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); @@ -5686,7 +5777,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { // have a fall-through edge, because this requires an explicit // jmp when the condition is false. if (Cmp == Cond.getOperand(1).getOperand(1) && - isX86LogicalCmp(Opc) && + isX86LogicalCmp(Cmp) && Op.getNode()->hasOneUse()) { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); @@ -5729,8 +5820,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond= DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond, - DAG.getConstant(0, MVT::i8)); + Cond = EmitTest(Cond, DAG); } return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cond); @@ -6663,6 +6753,14 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Unknown ovf instruction!"); case ISD::SADDO: + // A subtract of one will be selected as a INC. Note that INC doesn't + // set CF, so we can't do this for UADDO. + if (ConstantSDNode *C = dyn_cast(Op)) + if (C->getAPIntValue() == 1) { + BaseOp = X86ISD::INC; + Cond = X86::COND_O; + break; + } BaseOp = X86ISD::ADD; Cond = X86::COND_O; break; @@ -6671,6 +6769,14 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { Cond = X86::COND_B; break; case ISD::SSUBO: + // A subtract of one will be selected as a DEC. Note that DEC doesn't + // set CF, so we can't do this for USUBO. + if (ConstantSDNode *C = dyn_cast(Op)) + if (C->getAPIntValue() == 1) { + BaseOp = X86ISD::DEC; + Cond = X86::COND_O; + break; + } BaseOp = X86ISD::SUB; Cond = X86::COND_O; break; @@ -7011,6 +7117,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SUB: return "X86ISD::SUB"; case X86ISD::SMUL: return "X86ISD::SMUL"; case X86ISD::UMUL: return "X86ISD::UMUL"; + case X86ISD::INC: return "X86ISD::INC"; + case X86ISD::DEC: return "X86ISD::DEC"; } } @@ -7809,6 +7917,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case X86ISD::SUB: case X86ISD::SMUL: case X86ISD::UMUL: + case X86ISD::INC: + case X86ISD::DEC: // These nodes' second result is a boolean. if (Op.getResNo() == 0) break; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 09332930a77..5d3f287a381 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -235,9 +235,9 @@ namespace llvm { PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, - // ADD, SUB, SMUL, UMUL - Arithmetic operations with overflow/carry - // intrinsics. - ADD, SUB, SMUL, UMUL + // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. + ADD, SUB, SMUL, UMUL, + INC, DEC }; } @@ -659,6 +659,14 @@ namespace llvm { MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, MachineBasicBlock *BB, unsigned cmovOpc) const; + + /// Emit nodes that will be selected as "test Op0,Op0", or something + /// equivalent. + SDValue EmitTest(SDValue Op0, SelectionDAG &DAG); + + /// Emit nodes that will be selected as "cmp Op0,Op1", or something + /// equivalent. + SDValue EmitCmp(SDValue Op0, SDValue Op1, SelectionDAG &DAG); }; namespace X86 { diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 38a16135cdf..1a3cf5f47c8 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1632,101 +1632,113 @@ def : Pat<(subc GR64:$src1, imm:$src2), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; //===----------------------------------------------------------------------===// -// Overflow Patterns +// EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// Register-Register Addition with Overflow -def : Pat<(parallel (X86add_ovf GR64:$src1, GR64:$src2), +// Register-Register Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), (ADD64rr GR64:$src1, GR64:$src2)>; -// Register-Integer Addition with Overflow -def : Pat<(parallel (X86add_ovf GR64:$src1, i64immSExt8:$src2), +// Register-Integer Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2), (implicit EFLAGS)), (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86add_ovf GR64:$src1, i64immSExt32:$src2), +def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2), (implicit EFLAGS)), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; -// Register-Memory Addition with Overflow -def : Pat<(parallel (X86add_ovf GR64:$src1, (load addr:$src2)), +// Register-Memory Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (ADD64rm GR64:$src1, addr:$src2)>; -// Memory-Register Addition with Overflow -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR64:$src2), +// Memory-Register Addition with EFLAGS result +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2), addr:$dst), (implicit EFLAGS)), (ADD64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i64immSExt8:$src2), +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (ADD64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i64immSExt32:$src2), +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt32:$src2), addr:$dst), (implicit EFLAGS)), (ADD64mi32 addr:$dst, i64immSExt32:$src2)>; -// Register-Register Subtraction with Overflow -def : Pat<(parallel (X86sub_ovf GR64:$src1, GR64:$src2), +// Register-Register Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), (SUB64rr GR64:$src1, GR64:$src2)>; -// Register-Memory Subtraction with Overflow -def : Pat<(parallel (X86sub_ovf GR64:$src1, (load addr:$src2)), +// Register-Memory Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (SUB64rm GR64:$src1, addr:$src2)>; -// Register-Integer Subtraction with Overflow -def : Pat<(parallel (X86sub_ovf GR64:$src1, i64immSExt8:$src2), +// Register-Integer Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2), (implicit EFLAGS)), (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86sub_ovf GR64:$src1, i64immSExt32:$src2), +def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2), (implicit EFLAGS)), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; -// Memory-Register Subtraction with Overflow -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR64:$src2), +// Memory-Register Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2), addr:$dst), (implicit EFLAGS)), (SUB64mr addr:$dst, GR64:$src2)>; -// Memory-Integer Subtraction with Overflow -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i64immSExt8:$src2), +// Memory-Integer Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (SUB64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i64immSExt32:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt32:$src2), addr:$dst), (implicit EFLAGS)), (SUB64mi32 addr:$dst, i64immSExt32:$src2)>; -// Register-Register Signed Integer Multiplication with Overflow -def : Pat<(parallel (X86smul_ovf GR64:$src1, GR64:$src2), +// Register-Register Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), (IMUL64rr GR64:$src1, GR64:$src2)>; -// Register-Memory Signed Integer Multiplication with Overflow -def : Pat<(parallel (X86smul_ovf GR64:$src1, (load addr:$src2)), +// Register-Memory Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (IMUL64rm GR64:$src1, addr:$src2)>; -// Register-Integer Signed Integer Multiplication with Overflow -def : Pat<(parallel (X86smul_ovf GR64:$src1, i64immSExt8:$src2), +// Register-Integer Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2), (implicit EFLAGS)), (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86smul_ovf GR64:$src1, i64immSExt32:$src2), +def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2), (implicit EFLAGS)), (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; -// Memory-Integer Signed Integer Multiplication with Overflow -def : Pat<(parallel (X86smul_ovf (load addr:$src1), i64immSExt8:$src2), +// Memory-Integer Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2), (implicit EFLAGS)), (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86smul_ovf (load addr:$src1), i64immSExt32:$src2), +def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2), (implicit EFLAGS)), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; +// INC and DEC with EFLAGS result. Note that these do not set CF. +def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)), + (INC64r GR64:$src)>; +def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC64m addr:$dst)>; +def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)), + (DEC64r GR64:$src)>; +def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC64m addr:$dst)>; + //===----------------------------------------------------------------------===// // X86-64 SSE Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 93f9e028022..e36fadadb91 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -27,11 +27,13 @@ def SDTX86Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; -def SDTUnaryArithOvf : SDTypeProfile<1, 1, - [SDTCisInt<0>]>; -def SDTBinaryArithOvf : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>]>; +// Unary and binary operator instructions that set EFLAGS as a side-effect. +def SDTUnaryArithWithFlags : SDTypeProfile<1, 1, + [SDTCisInt<0>]>; +def SDTBinaryArithWithFlags : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<0>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, @@ -148,10 +150,12 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInFlag]>; -def X86add_ovf : SDNode<"X86ISD::ADD", SDTBinaryArithOvf>; -def X86sub_ovf : SDNode<"X86ISD::SUB", SDTBinaryArithOvf>; -def X86smul_ovf : SDNode<"X86ISD::SMUL", SDTBinaryArithOvf>; -def X86umul_ovf : SDNode<"X86ISD::UMUL", SDTUnaryArithOvf>; +def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>; +def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; +def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; +def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; +def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; +def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -3486,217 +3490,251 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; //===----------------------------------------------------------------------===// -// Overflow Patterns +// EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// Register-Register Addition with Overflow -def : Pat<(parallel (X86add_ovf GR8:$src1, GR8:$src2), +// Register-Register Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), (ADD8rr GR8:$src1, GR8:$src2)>; -// Register-Register Addition with Overflow -def : Pat<(parallel (X86add_ovf GR16:$src1, GR16:$src2), +// Register-Register Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2), (implicit EFLAGS)), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86add_ovf GR32:$src1, GR32:$src2), +def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2), (implicit EFLAGS)), (ADD32rr GR32:$src1, GR32:$src2)>; -// Register-Memory Addition with Overflow -def : Pat<(parallel (X86add_ovf GR8:$src1, (load addr:$src2)), +// Register-Memory Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)), (implicit EFLAGS)), (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86add_ovf GR16:$src1, (load addr:$src2)), +def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)), (implicit EFLAGS)), (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86add_ovf GR32:$src1, (load addr:$src2)), +def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)), (implicit EFLAGS)), (ADD32rm GR32:$src1, addr:$src2)>; -// Register-Integer Addition with Overflow -def : Pat<(parallel (X86add_ovf GR8:$src1, imm:$src2), +// Register-Integer Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2), (implicit EFLAGS)), (ADD8ri GR8:$src1, imm:$src2)>; -// Register-Integer Addition with Overflow -def : Pat<(parallel (X86add_ovf GR16:$src1, imm:$src2), +// Register-Integer Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2), (implicit EFLAGS)), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86add_ovf GR32:$src1, imm:$src2), +def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2), (implicit EFLAGS)), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86add_ovf GR16:$src1, i16immSExt8:$src2), +def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2), (implicit EFLAGS)), (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86add_ovf GR32:$src1, i32immSExt8:$src2), +def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Addition with Overflow -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR8:$src2), +// Memory-Register Addition with EFLAGS result +def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2), addr:$dst), (implicit EFLAGS)), (ADD8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR16:$src2), +def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2), addr:$dst), (implicit EFLAGS)), (ADD16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR32:$src2), +def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2), addr:$dst), (implicit EFLAGS)), (ADD32mr addr:$dst, GR32:$src2)>; -def : Pat<(parallel (store (X86add_ovf (loadi8 addr:$dst), imm:$src2), +def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2), addr:$dst), (implicit EFLAGS)), (ADD8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_ovf (loadi16 addr:$dst), imm:$src2), +def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2), addr:$dst), (implicit EFLAGS)), (ADD16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_ovf (loadi32 addr:$dst), imm:$src2), +def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2), addr:$dst), (implicit EFLAGS)), (ADD32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i16immSExt8:$src2), +def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (ADD16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i32immSExt8:$src2), +def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (ADD32mi8 addr:$dst, i32immSExt8:$src2)>; -// Register-Register Subtraction with Overflow -def : Pat<(parallel (X86sub_ovf GR8:$src1, GR8:$src2), +// Register-Register Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), (SUB8rr GR8:$src1, GR8:$src2)>; -def : Pat<(parallel (X86sub_ovf GR16:$src1, GR16:$src2), +def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2), (implicit EFLAGS)), (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86sub_ovf GR32:$src1, GR32:$src2), +def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2), (implicit EFLAGS)), (SUB32rr GR32:$src1, GR32:$src2)>; -// Register-Memory Subtraction with Overflow -def : Pat<(parallel (X86sub_ovf GR8:$src1, (load addr:$src2)), +// Register-Memory Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)), (implicit EFLAGS)), (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86sub_ovf GR16:$src1, (load addr:$src2)), +def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)), (implicit EFLAGS)), (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86sub_ovf GR32:$src1, (load addr:$src2)), +def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)), (implicit EFLAGS)), (SUB32rm GR32:$src1, addr:$src2)>; -// Register-Integer Subtraction with Overflow -def : Pat<(parallel (X86sub_ovf GR8:$src1, imm:$src2), +// Register-Integer Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2), (implicit EFLAGS)), (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(parallel (X86sub_ovf GR16:$src1, imm:$src2), +def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2), (implicit EFLAGS)), (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86sub_ovf GR32:$src1, imm:$src2), +def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2), (implicit EFLAGS)), (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86sub_ovf GR16:$src1, i16immSExt8:$src2), +def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2), (implicit EFLAGS)), (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86sub_ovf GR32:$src1, i32immSExt8:$src2), +def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Subtraction with Overflow -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR8:$src2), +// Memory-Register Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2), addr:$dst), (implicit EFLAGS)), (SUB8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR16:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2), addr:$dst), (implicit EFLAGS)), (SUB16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR32:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2), addr:$dst), (implicit EFLAGS)), (SUB32mr addr:$dst, GR32:$src2)>; -// Memory-Integer Subtraction with Overflow -def : Pat<(parallel (store (X86sub_ovf (loadi8 addr:$dst), imm:$src2), +// Memory-Integer Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2), addr:$dst), (implicit EFLAGS)), (SUB8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (loadi16 addr:$dst), imm:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2), addr:$dst), (implicit EFLAGS)), (SUB16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (loadi32 addr:$dst), imm:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2), addr:$dst), (implicit EFLAGS)), (SUB32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i16immSExt8:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (SUB16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i32immSExt8:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (SUB32mi8 addr:$dst, i32immSExt8:$src2)>; -// Register-Register Signed Integer Multiply with Overflow -def : Pat<(parallel (X86smul_ovf GR16:$src1, GR16:$src2), +// Register-Register Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2), (implicit EFLAGS)), (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86smul_ovf GR32:$src1, GR32:$src2), +def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2), (implicit EFLAGS)), (IMUL32rr GR32:$src1, GR32:$src2)>; -// Register-Memory Signed Integer Multiply with Overflow -def : Pat<(parallel (X86smul_ovf GR16:$src1, (load addr:$src2)), +// Register-Memory Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)), (implicit EFLAGS)), (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86smul_ovf GR32:$src1, (load addr:$src2)), +def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)), (implicit EFLAGS)), (IMUL32rm GR32:$src1, addr:$src2)>; -// Register-Integer Signed Integer Multiply with Overflow -def : Pat<(parallel (X86smul_ovf GR16:$src1, imm:$src2), +// Register-Integer Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2), (implicit EFLAGS)), (IMUL16rri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_ovf GR32:$src1, imm:$src2), +def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2), (implicit EFLAGS)), (IMUL32rri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_ovf GR16:$src1, i16immSExt8:$src2), +def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2), (implicit EFLAGS)), (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86smul_ovf GR32:$src1, i32immSExt8:$src2), +def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Integer Signed Integer Multiply with Overflow -def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2), +// Memory-Integer Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2), (implicit EFLAGS)), (IMUL16rmi addr:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2), +def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2), (implicit EFLAGS)), (IMUL32rmi addr:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_ovf (load addr:$src1), i16immSExt8:$src2), +def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2), (implicit EFLAGS)), (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86smul_ovf (load addr:$src1), i32immSExt8:$src2), +def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2), (implicit EFLAGS)), (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; -// Optimize multiple with overflow by 2. +// Optimize multiply by 2 with EFLAGS result. let AddedComplexity = 2 in { -def : Pat<(parallel (X86smul_ovf GR16:$src1, 2), +def : Pat<(parallel (X86smul_flag GR16:$src1, 2), (implicit EFLAGS)), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(parallel (X86smul_ovf GR32:$src1, 2), +def : Pat<(parallel (X86smul_flag GR32:$src1, 2), (implicit EFLAGS)), (ADD32rr GR32:$src1, GR32:$src1)>; } +// INC and DEC with EFLAGS result. Note that these do not set CF. +def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)), + (INC8r GR8:$src)>; +def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC8m addr:$dst)>; +def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)), + (DEC8r GR8:$src)>; +def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC8m addr:$dst)>; + +def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), + (INC16r GR16:$src)>; +def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC16m addr:$dst)>; +def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), + (DEC16r GR16:$src)>; +def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC16m addr:$dst)>; + +def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), + (INC32r GR32:$src)>; +def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC32m addr:$dst)>; +def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), + (DEC32r GR32:$src)>; +def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC32m addr:$dst)>; + //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/peep-test.ll b/test/CodeGen/X86/peep-test.ll new file mode 100644 index 00000000000..a95b5640443 --- /dev/null +++ b/test/CodeGen/X86/peep-test.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep cmp %t +; RUN: not grep test %t + +define void @loop(i64 %n, double* nocapture %d) nounwind { +entry: + br label %bb + +bb: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %bb ] + %i.03 = add i64 %indvar, %n + %0 = getelementptr double* %d, i64 %i.03 + %1 = load double* %0, align 8 + %2 = mul double %1, 3.000000e+00 + store double %2, double* %0, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %bb + +return: + ret void +}