Fix sint_to_fp (fild*) support.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25257 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-01-12 22:54:21 +00:00
parent 4aafb4ff92
commit a3195e8643
5 changed files with 61 additions and 42 deletions

View File

@ -31,16 +31,12 @@ though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
Need to add support for rotate instructions.
//===---------------------------------------------------------------------===//
Some targets (e.g. athlons) prefer freep to fstp ST(0): Some targets (e.g. athlons) prefer freep to fstp ST(0):
http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
This should use faddi on chips where it is profitable: This should use fiadd on chips where it is profitable:
double foo(double P, int *I) { return P+*I; } double foo(double P, int *I) { return P+*I; }
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
@ -107,3 +103,12 @@ Should we promote i16 to i32 to avoid partial register update stalls?
Leave any_extend as pseudo instruction and hint to register Leave any_extend as pseudo instruction and hint to register
allocator. Delay codegen until post register allocation. allocator. Delay codegen until post register allocation.
//===---------------------------------------------------------------------===//
Add a target specific hook to DAG combiner to handle SINT_TO_FP and
FP_TO_SINT when the source operand is already in memory.
//===---------------------------------------------------------------------===//
Check if load folding would add a cycle in the dag.

View File

@ -194,6 +194,11 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
// Set up the FP register classes. // Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFPRegisterClass); addRegisterClass(MVT::f64, X86::RFPRegisterClass);
if (X86DAGIsel) {
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
}
if (!UnsafeFPMath) { if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand);
@ -1404,22 +1409,30 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
} }
case ISD::SINT_TO_FP: { case ISD::SINT_TO_FP: {
assert(Op.getValueType() == MVT::f64 && assert(Op.getValueType() == MVT::f64 &&
Op.getOperand(0).getValueType() == MVT::i64 && Op.getOperand(0).getValueType() <= MVT::i64 &&
Op.getOperand(0).getValueType() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!"); "Unknown SINT_TO_FP to lower!");
// We lower sint64->FP into a store to a temporary stack slot, followed by a
// FILD64m node. SDOperand Result;
MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
unsigned Size = MVT::getSizeInBits(SrcVT)/8;
MachineFunction &MF = DAG.getMachineFunction(); MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
Op.getOperand(0), StackSlot, DAG.getSrcValue(NULL)); DAG.getEntryNode(), Op.getOperand(0),
std::vector<MVT::ValueType> RTs; StackSlot, DAG.getSrcValue(NULL));
RTs.push_back(MVT::f64);
RTs.push_back(MVT::Other); // Build the FILD
std::vector<MVT::ValueType> Tys;
Tys.push_back(MVT::f64);
Tys.push_back(MVT::Flag);
std::vector<SDOperand> Ops; std::vector<SDOperand> Ops;
Ops.push_back(Store); Ops.push_back(Chain);
Ops.push_back(StackSlot); Ops.push_back(StackSlot);
return DAG.getNode(X86ISD::FILD64m, RTs, Ops); Ops.push_back(DAG.getValueType(SrcVT));
Result = DAG.getNode(X86ISD::FILD, Tys, Ops);
return Result;
} }
case ISD::FP_TO_SINT: { case ISD::FP_TO_SINT: {
assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
@ -1749,7 +1762,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FILD64m: return "X86ISD::FILD64m"; case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";

View File

@ -40,16 +40,16 @@ namespace llvm {
SHLD, SHLD,
SHRD, SHRD,
/// FILD64m - This instruction implements SINT_TO_FP with a /// FILD - This instruction implements SINT_TO_FP with the integer source
/// 64-bit source in memory and a FP reg result. This corresponds to /// in memory and FP reg result. This corresponds to the X86::FILD*m
/// the X86::FILD64m instruction. It has two inputs (token chain and /// instructions. It has three inputs (token chain, address, and source
/// address) and two outputs (FP value and token chain). /// type) and two outputs (FP value and token chain).
FILD64m, FILD,
/// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds /// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It /// to the X86::FIST*m instructions and the rounding mode change stuff. It
/// has two inputs (token chain and address) and two outputs (FP value and /// has two inputs (token chain and address) and two outputs (int value and
/// token chain). /// token chain).
FP_TO_INT16_IN_MEM, FP_TO_INT16_IN_MEM,
FP_TO_INT32_IN_MEM, FP_TO_INT32_IN_MEM,

View File

@ -2259,7 +2259,7 @@ unsigned ISel::SelectExpr(SDOperand N) {
addFullAddress(BuildMI(BB, Opc, 4, Result), AM); addFullAddress(BuildMI(BB, Opc, 4, Result), AM);
} }
return Result; return Result;
case X86ISD::FILD64m: case X86ISD::FILD:
// Make sure we generate both values. // Make sure we generate both values.
assert(Result != 1 && N.getValueType() == MVT::f64); assert(Result != 1 && N.getValueType() == MVT::f64);
if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second) if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second)
@ -3301,7 +3301,7 @@ void ISel::Select(SDOperand N) {
SelectExpr(N); SelectExpr(N);
return; return;
case ISD::CopyFromReg: case ISD::CopyFromReg:
case X86ISD::FILD64m: case X86ISD::FILD:
ExprMap.erase(N); ExprMap.erase(N);
SelectExpr(N.getValue(0)); SelectExpr(N.getValue(0));
return; return;

View File

@ -50,7 +50,8 @@ def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
def SDTX86Fild64m : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisPtrTy<1>]>; def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
@ -99,7 +100,7 @@ def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain]>; [SDNPHasChain]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
[SDNPHasChain]>; [SDNPHasChain]>;
def X86fild64m : SDNode<"X86ISD::FILD64m", SDTX86Fild64m, def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain]>; [SDNPHasChain]>;
def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
@ -2707,50 +2708,50 @@ def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
// FIXME: Implement these when we have a dag-dag isel! // FIXME: Implement these when we have a dag-dag isel!
def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fadd RFP:$src1, [(set RFP:$dst, (fadd RFP:$src1,
(sint_to_fp (loadi16 addr:$src2))))]>; (X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) + [mem16int] // ST(0) = ST(0) + [mem16int]
def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fadd RFP:$src1, [(set RFP:$dst, (fadd RFP:$src1,
(sint_to_fp (loadi32 addr:$src2))))]>; (X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) + [mem32int] // ST(0) = ST(0) + [mem32int]
def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fmul RFP:$src1, [(set RFP:$dst, (fmul RFP:$src1,
(sint_to_fp (loadi16 addr:$src2))))]>; (X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) * [mem16int] // ST(0) = ST(0) * [mem16int]
def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fmul RFP:$src1, [(set RFP:$dst, (fmul RFP:$src1,
(sint_to_fp (loadi32 addr:$src2))))]>; (X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) * [mem32int] // ST(0) = ST(0) * [mem32int]
def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fsub RFP:$src1, [(set RFP:$dst, (fsub RFP:$src1,
(sint_to_fp (loadi16 addr:$src2))))]>; (X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) - [mem16int] // ST(0) = ST(0) - [mem16int]
def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fsub RFP:$src1, [(set RFP:$dst, (fsub RFP:$src1,
(sint_to_fp (loadi32 addr:$src2))))]>; (X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) - [mem32int] // ST(0) = ST(0) - [mem32int]
def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fsub (sint_to_fp (loadi16 addr:$src2)), [(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
RFP:$src1))]>; RFP:$src1))]>;
// ST(0) = [mem16int] - ST(0) // ST(0) = [mem16int] - ST(0)
def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fsub (sint_to_fp (loadi32 addr:$src2)), [(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
RFP:$src1))]>; RFP:$src1))]>;
// ST(0) = [mem32int] - ST(0) // ST(0) = [mem32int] - ST(0)
def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fdiv RFP:$src1, [(set RFP:$dst, (fdiv RFP:$src1,
(sint_to_fp (loadi16 addr:$src2))))]>; (X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) / [mem16int] // ST(0) = ST(0) / [mem16int]
def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fdiv RFP:$src1, [(set RFP:$dst, (fdiv RFP:$src1,
(sint_to_fp (loadi32 addr:$src2))))]>; (X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) / [mem32int] // ST(0) = ST(0) / [mem32int]
def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fdiv (sint_to_fp (loadi16 addr:$src2)), [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
RFP:$src1))]>; RFP:$src1))]>;
// ST(0) = [mem16int] / ST(0) // ST(0) = [mem16int] / ST(0)
def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP:$dst, (fdiv (sint_to_fp (loadi32 addr:$src2)), [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
RFP:$src1))]>; RFP:$src1))]>;
// ST(0) = [mem32int] / ST(0) // ST(0) = [mem32int] / ST(0)
@ -2863,11 +2864,11 @@ def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP, def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
[(set RFP:$dst, (loadf64 addr:$src))]>; [(set RFP:$dst, (loadf64 addr:$src))]>;
def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP, def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
[(set RFP:$dst, (sint_to_fp (loadi16 addr:$src)))]>; [(set RFP:$dst, (X86fild addr:$src, i16))]>;
def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP, def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
[(set RFP:$dst, (sint_to_fp (loadi32 addr:$src)))]>; [(set RFP:$dst, (X86fild addr:$src, i32))]>;
def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP, def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
[(set RFP:$dst, (X86fild64m addr:$src))]>; [(set RFP:$dst, (X86fild addr:$src, i64))]>;
def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
[(truncstore RFP:$src, addr:$op, f32)]>; [(truncstore RFP:$src, addr:$op, f32)]>;