mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-10 02:36:06 +00:00
Add custom UINT_TO_FP from v4i8/v4i16/v8i8/v8i16 to v4f32/v8f32
- Replace v4i8/v8i8 -> v8f32 DAG combine with custom lowering to reduce DAG combine overhead. - Extend the support to v4i16/v8i16 as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166487 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d9d09600ee
commit
a7554630e9
@ -948,6 +948,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
|
||||
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
|
||||
|
||||
@ -1067,6 +1070,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
|
||||
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
|
||||
|
||||
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
|
||||
@ -1257,7 +1264,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::TRUNCATE);
|
||||
setTargetDAGCombine(ISD::UINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::SETCC);
|
||||
if (Subtarget->is64Bit())
|
||||
@ -8086,11 +8092,29 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
|
||||
return Sub;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
EVT SVT = N0.getValueType();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
|
||||
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
|
||||
"Custom UINT_TO_FP is not supported!");
|
||||
|
||||
EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (Op.getValueType().isVector())
|
||||
return lowerUINT_TO_FP_vec(Op, DAG);
|
||||
|
||||
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
|
||||
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
|
||||
// the optimization here.
|
||||
@ -8264,6 +8288,30 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
|
||||
}
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue In = Op.getOperand(0);
|
||||
EVT SVT = In.getValueType();
|
||||
|
||||
if (!VT.is256BitVector() || !SVT.is128BitVector() ||
|
||||
VT.getVectorNumElements() != SVT.getVectorNumElements())
|
||||
return SDValue();
|
||||
|
||||
assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
|
||||
|
||||
// AVX2 has better support of integer extending.
|
||||
if (Subtarget->hasAVX2())
|
||||
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
|
||||
|
||||
SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
|
||||
static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
|
||||
SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
|
||||
DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
@ -11569,6 +11617,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
|
||||
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
||||
case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
|
||||
case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG);
|
||||
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
||||
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
|
||||
case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
|
||||
@ -16446,23 +16495,6 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
EVT InVT = Op0->getValueType(0);
|
||||
|
||||
// UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
|
||||
if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
|
||||
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
|
||||
// Notice that we use SINT_TO_FP because we know that the high bits
|
||||
// are zero and SINT_TO_FP is better supported by the hardware.
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86TargetLowering *XTLI) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
@ -16645,7 +16677,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
|
||||
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
|
||||
|
@ -801,7 +801,9 @@ namespace llvm {
|
||||
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
Loading…
x
Reference in New Issue
Block a user