diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b33ced8ba66..117d2734c54 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1162,12 +1162,9 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { return NULL; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; - bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; + case X86ISD::DIV: + case X86ISD::IDIV: { + bool isSigned = Opcode == X86ISD::IDIV; if (!isSigned) switch (NVT) { default: assert(0 && "Unsupported VT!"); @@ -1275,31 +1272,49 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); } - unsigned Reg = isDiv ? LoReg : HiReg; - SDOperand Result; - if (Reg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); - Chain = Result.getValue(1); - Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 - Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, - MVT::i8, Result, SRIdx), 0); - } else { - Result = CurDAG->getCopyFromReg(Chain, Reg, NVT, InFlag); + // Copy the division (low) result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDOperand Result = CurDAG->getCopyFromReg(Chain, LoReg, NVT, InFlag); Chain = Result.getValue(1); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.Val->dump(CurDAG)); + DOUT << "\n"; +#endif + } + // Copy the remainder (high) result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDOperand Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); + Chain = Result.getValue(1); + InFlag = Result.getValue(2); + Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 + Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, + MVT::i8, Result, SRIdx), 0); + } else { + Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); + Chain = Result.getValue(1); + InFlag = Result.getValue(2); + } + ReplaceUses(N.getValue(1), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.Val->dump(CurDAG)); + DOUT << "\n"; +#endif } - ReplaceUses(N.getValue(0), Result); if (foldedLoad) ReplaceUses(N1.getValue(1), Chain); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.Val->dump(CurDAG)); - DOUT << "\n"; Indent -= 2; #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a67e77f2ce6..1a8089688bf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -155,6 +155,27 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); } + // Divide and remainder are lowered to use div or idiv in legalize in + // order to expose the intermediate computations to trivial CSE. This is + // most noticeable when both x/y and x%y are being computed; they can be + // done with a single div or idiv. + setOperationAction(ISD::SDIV , MVT::i8 , Custom); + setOperationAction(ISD::UDIV , MVT::i8 , Custom); + setOperationAction(ISD::SREM , MVT::i8 , Custom); + setOperationAction(ISD::UREM , MVT::i8 , Custom); + setOperationAction(ISD::SDIV , MVT::i16 , Custom); + setOperationAction(ISD::UDIV , MVT::i16 , Custom); + setOperationAction(ISD::SREM , MVT::i16 , Custom); + setOperationAction(ISD::UREM , MVT::i16 , Custom); + setOperationAction(ISD::SDIV , MVT::i32 , Custom); + setOperationAction(ISD::UDIV , MVT::i32 , Custom); + setOperationAction(ISD::SREM , MVT::i32 , Custom); + setOperationAction(ISD::UREM , MVT::i32 , Custom); + setOperationAction(ISD::SDIV , MVT::i64 , Custom); + setOperationAction(ISD::UDIV , MVT::i64 , Custom); + setOperationAction(ISD::SREM , MVT::i64 , Custom); + setOperationAction(ISD::UREM , MVT::i64 , Custom); + setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); setOperationAction(ISD::BR_CC , MVT::Other, Expand); @@ -3393,6 +3414,22 @@ SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); } +SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) { + unsigned Opcode = Op.getOpcode(); + MVT::ValueType NVT = Op.getValueType(); + bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; + bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; + unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV; + + SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) }; + SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2); + + if (isDiv) + return DR; + + return SDOperand(DR.Val, 1); +} + SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { assert(Op.getOperand(0).getValueType() <= MVT::i64 && Op.getOperand(0).getValueType() >= MVT::i16 && @@ -4668,6 +4705,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SHL_PARTS: case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShift(Op, DAG); + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: return LowerIntegerDivOrRem(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); @@ -4751,6 +4792,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; + case X86ISD::DIV: return "X86ISD::DIV"; + case X86ISD::IDIV: return "X86ISD::IDIV"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 04279e835cb..10172d95a93 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -181,6 +181,10 @@ namespace llvm { /// in order to obtain suitable precision. FRSQRT, FRCP, + /// DIV, IDIV - Unsigned and signed integer division and reciprocal. + /// + DIV, IDIV, + // Thread Local Storage TLSADDR, THREAD_POINTER, @@ -420,6 +424,7 @@ namespace llvm { SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG); SDOperand LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG); SDOperand LowerShift(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG); SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG); diff --git a/test/CodeGen/X86/divrem.ll b/test/CodeGen/X86/divrem.ll new file mode 100644 index 00000000000..a611eddc768 --- /dev/null +++ b/test/CodeGen/X86/divrem.ll @@ -0,0 +1,58 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8 + +define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) { + %r = sdiv i64 %x, %y + %t = srem i64 %x, %y + store i64 %r, i64* %p + store i64 %t, i64* %q + ret void +} +define void @si32(i32 %x, i32 %y, i32* %p, i32* %q) { + %r = sdiv i32 %x, %y + %t = srem i32 %x, %y + store i32 %r, i32* %p + store i32 %t, i32* %q + ret void +} +define void @si16(i16 %x, i16 %y, i16* %p, i16* %q) { + %r = sdiv i16 %x, %y + %t = srem i16 %x, %y + store i16 %r, i16* %p + store i16 %t, i16* %q + ret void +} +define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) { + %r = sdiv i8 %x, %y + %t = srem i8 %x, %y + store i8 %r, i8* %p + store i8 %t, i8* %q + ret void +} +define void @ui64(i64 %x, i64 %y, i64* %p, i64* %q) { + %r = udiv i64 %x, %y + %t = urem i64 %x, %y + store i64 %r, i64* %p + store i64 %t, i64* %q + ret void +} +define void @ui32(i32 %x, i32 %y, i32* %p, i32* %q) { + %r = udiv i32 %x, %y + %t = urem i32 %x, %y + store i32 %r, i32* %p + store i32 %t, i32* %q + ret void +} +define void @ui16(i16 %x, i16 %y, i16* %p, i16* %q) { + %r = udiv i16 %x, %y + %t = urem i16 %x, %y + store i16 %r, i16* %p + store i16 %t, i16* %q + ret void +} +define void @ui8(i8 %x, i8 %y, i8* %p, i8* %q) { + %r = udiv i8 %x, %y + %t = urem i8 %x, %y + store i8 %r, i8* %p + store i8 %t, i8* %q + ret void +}