diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 5fdcc696193..a08e8fd4583 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -31,16 +31,12 @@ though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224 //===---------------------------------------------------------------------===// -Need to add support for rotate instructions. - -//===---------------------------------------------------------------------===// - Some targets (e.g. athlons) prefer freep to fstp ST(0): http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html //===---------------------------------------------------------------------===// -This should use faddi on chips where it is profitable: +This should use fiadd on chips where it is profitable: double foo(double P, int *I) { return P+*I; } //===---------------------------------------------------------------------===// @@ -107,3 +103,12 @@ Should we promote i16 to i32 to avoid partial register update stalls? Leave any_extend as pseudo instruction and hint to register allocator. Delay codegen until post register allocation. + +//===---------------------------------------------------------------------===// + +Add a target specific hook to DAG combiner to handle SINT_TO_FP and +FP_TO_SINT when the source operand is already in memory. + +//===---------------------------------------------------------------------===// + +Check if load folding would add a cycle in the dag. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 321a9c360a5..4849c95f839 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -194,6 +194,11 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) // Set up the FP register classes. addRegisterClass(MVT::f64, X86::RFPRegisterClass); + if (X86DAGIsel) { + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + } + if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); @@ -1404,22 +1409,30 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { } case ISD::SINT_TO_FP: { assert(Op.getValueType() == MVT::f64 && - Op.getOperand(0).getValueType() == MVT::i64 && + Op.getOperand(0).getValueType() <= MVT::i64 && + Op.getOperand(0).getValueType() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); - // We lower sint64->FP into a store to a temporary stack slot, followed by a - // FILD64m node. + + SDOperand Result; + MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); + unsigned Size = MVT::getSizeInBits(SrcVT)/8; MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), - Op.getOperand(0), StackSlot, DAG.getSrcValue(NULL)); - std::vector RTs; - RTs.push_back(MVT::f64); - RTs.push_back(MVT::Other); + SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, + DAG.getEntryNode(), Op.getOperand(0), + StackSlot, DAG.getSrcValue(NULL)); + + // Build the FILD + std::vector Tys; + Tys.push_back(MVT::f64); + Tys.push_back(MVT::Flag); std::vector Ops; - Ops.push_back(Store); + Ops.push_back(Chain); Ops.push_back(StackSlot); - return DAG.getNode(X86ISD::FILD64m, RTs, Ops); + Ops.push_back(DAG.getValueType(SrcVT)); + Result = DAG.getNode(X86ISD::FILD, Tys, Ops); + return Result; } case ISD::FP_TO_SINT: { assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && @@ -1749,7 +1762,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHRD: return "X86ISD::SHRD"; - case X86ISD::FILD64m: return "X86ISD::FILD64m"; + case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a4d481aca31..1deb5f1304d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -40,16 +40,16 @@ namespace llvm { SHLD, SHRD, - /// FILD64m - This instruction implements SINT_TO_FP with a - /// 64-bit source in memory and a FP reg result. This corresponds to - /// the X86::FILD64m instruction. It has two inputs (token chain and - /// address) and two outputs (FP value and token chain). - FILD64m, + /// FILD - This instruction implements SINT_TO_FP with the integer source + /// in memory and FP reg result. This corresponds to the X86::FILD*m + /// instructions. It has three inputs (token chain, address, and source + /// type) and two outputs (FP value and token chain). + FILD, /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (FP value and + /// has two inputs (token chain and address) and two outputs (int value and /// token chain). FP_TO_INT16_IN_MEM, FP_TO_INT32_IN_MEM, diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp index 6ca4078047d..cb1835489c1 100644 --- a/lib/Target/X86/X86ISelPattern.cpp +++ b/lib/Target/X86/X86ISelPattern.cpp @@ -2259,7 +2259,7 @@ unsigned ISel::SelectExpr(SDOperand N) { addFullAddress(BuildMI(BB, Opc, 4, Result), AM); } return Result; - case X86ISD::FILD64m: + case X86ISD::FILD: // Make sure we generate both values. assert(Result != 1 && N.getValueType() == MVT::f64); if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second) @@ -3301,7 +3301,7 @@ void ISel::Select(SDOperand N) { SelectExpr(N); return; case ISD::CopyFromReg: - case X86ISD::FILD64m: + case X86ISD::FILD: ExprMap.erase(N); SelectExpr(N.getValue(0)); return; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 7431a3c1748..12d1333c2ed 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -50,7 +50,8 @@ def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; -def SDTX86Fild64m : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisPtrTy<1>]>; +def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>, + SDTCisVT<2, OtherVT>]>; def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; @@ -99,7 +100,7 @@ def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain]>; def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, [SDNPHasChain]>; -def X86fild64m : SDNode<"X86ISD::FILD64m", SDTX86Fild64m, +def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, [SDNPHasChain]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, @@ -2707,50 +2708,50 @@ def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">; // FIXME: Implement these when we have a dag-dag isel! def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fadd RFP:$src1, - (sint_to_fp (loadi16 addr:$src2))))]>; + (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) + [mem16int] def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fadd RFP:$src1, - (sint_to_fp (loadi32 addr:$src2))))]>; + (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) + [mem32int] def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fmul RFP:$src1, - (sint_to_fp (loadi16 addr:$src2))))]>; + (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) * [mem16int] def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fmul RFP:$src1, - (sint_to_fp (loadi32 addr:$src2))))]>; + (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) * [mem32int] def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fsub RFP:$src1, - (sint_to_fp (loadi16 addr:$src2))))]>; + (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) - [mem16int] def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fsub RFP:$src1, - (sint_to_fp (loadi32 addr:$src2))))]>; + (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) - [mem32int] def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (sint_to_fp (loadi16 addr:$src2)), + [(set RFP:$dst, (fsub (X86fild addr:$src2, i16), RFP:$src1))]>; // ST(0) = [mem16int] - ST(0) def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (sint_to_fp (loadi32 addr:$src2)), + [(set RFP:$dst, (fsub (X86fild addr:$src2, i32), RFP:$src1))]>; // ST(0) = [mem32int] - ST(0) def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fdiv RFP:$src1, - (sint_to_fp (loadi16 addr:$src2))))]>; + (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) / [mem16int] def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, [(set RFP:$dst, (fdiv RFP:$src1, - (sint_to_fp (loadi32 addr:$src2))))]>; + (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) / [mem32int] def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (sint_to_fp (loadi16 addr:$src2)), + [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16), RFP:$src1))]>; // ST(0) = [mem16int] / ST(0) def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (sint_to_fp (loadi32 addr:$src2)), + [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32), RFP:$src1))]>; // ST(0) = [mem32int] / ST(0) @@ -2863,11 +2864,11 @@ def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP, def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP, [(set RFP:$dst, (loadf64 addr:$src))]>; def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP, - [(set RFP:$dst, (sint_to_fp (loadi16 addr:$src)))]>; + [(set RFP:$dst, (X86fild addr:$src, i16))]>; def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP, - [(set RFP:$dst, (sint_to_fp (loadi32 addr:$src)))]>; + [(set RFP:$dst, (X86fild addr:$src, i32))]>; def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild64m addr:$src))]>; + [(set RFP:$dst, (X86fild addr:$src, i64))]>; def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, [(truncstore RFP:$src, addr:$op, f32)]>;