mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
Add alternative support for FP_ROUND from v2f32 to v2f64
- Due to the current matching vector elements constraints in ISD::FP_EXTEND, rounding from v2f32 to v2f64 is scalarized. Add a customized v2f32 widening to convert it into a target-specific X86ISD::VFPEXT to work around this constraints. This patch also reverts a previous attempt to fix this issue by recovering the scalarized ISD::FP_EXTEND pattern and thus significantly reduces the overhead of supporting non-power-2 vector FP extend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165625 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
98f01bf34b
commit
9d796db3e7
@ -634,7 +634,7 @@ private:
|
|||||||
SDValue WidenVecRes_InregOp(SDNode *N);
|
SDValue WidenVecRes_InregOp(SDNode *N);
|
||||||
|
|
||||||
// Widen Vector Operand.
|
// Widen Vector Operand.
|
||||||
bool WidenVectorOperand(SDNode *N, unsigned ResNo);
|
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
|
||||||
SDValue WidenVecOp_BITCAST(SDNode *N);
|
SDValue WidenVecOp_BITCAST(SDNode *N);
|
||||||
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
|
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
|
||||||
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
|
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
|
||||||
|
@ -2082,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Widen Vector Operand
|
// Widen Vector Operand
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
|
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||||
DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
|
DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
|
||||||
N->dump(&DAG);
|
N->dump(&DAG);
|
||||||
dbgs() << "\n");
|
dbgs() << "\n");
|
||||||
SDValue Res = SDValue();
|
SDValue Res = SDValue();
|
||||||
|
|
||||||
|
// See if the target wants to custom widen this node.
|
||||||
|
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
|
||||||
|
return false;
|
||||||
|
|
||||||
switch (N->getOpcode()) {
|
switch (N->getOpcode()) {
|
||||||
default:
|
default:
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
|
dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
|
||||||
N->dump(&DAG);
|
N->dump(&DAG);
|
||||||
dbgs() << "\n";
|
dbgs() << "\n";
|
||||||
#endif
|
#endif
|
||||||
|
@ -939,6 +939,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
|
||||||
|
|
||||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
|
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5161,86 +5163,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
|
|
||||||
// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
|
|
||||||
// constraint of matching input/output vector elements.
|
|
||||||
SDValue
|
|
||||||
X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
SDNode *N = Op.getNode();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
unsigned NumElts = Op.getNumOperands();
|
|
||||||
|
|
||||||
// Check supported types and sub-targets.
|
|
||||||
//
|
|
||||||
// Only v2f32 -> v2f64 needs special handling.
|
|
||||||
if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
SDValue VecIn;
|
|
||||||
EVT VecInVT;
|
|
||||||
SmallVector<int, 8> Mask;
|
|
||||||
EVT SrcVT = MVT::Other;
|
|
||||||
|
|
||||||
// Check the patterns could be translated into X86vfpext.
|
|
||||||
for (unsigned i = 0; i < NumElts; ++i) {
|
|
||||||
SDValue In = N->getOperand(i);
|
|
||||||
unsigned Opcode = In.getOpcode();
|
|
||||||
|
|
||||||
// Skip if the element is undefined.
|
|
||||||
if (Opcode == ISD::UNDEF) {
|
|
||||||
Mask.push_back(-1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Quit if one of the elements is not defined from 'fpext'.
|
|
||||||
if (Opcode != ISD::FP_EXTEND)
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
// Check how the source of 'fpext' is defined.
|
|
||||||
SDValue L2In = In.getOperand(0);
|
|
||||||
EVT L2InVT = L2In.getValueType();
|
|
||||||
|
|
||||||
// Check the original type
|
|
||||||
if (SrcVT == MVT::Other)
|
|
||||||
SrcVT = L2InVT;
|
|
||||||
else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
// Check whether the value being 'fpext'ed is extracted from the same
|
|
||||||
// source.
|
|
||||||
Opcode = L2In.getOpcode();
|
|
||||||
|
|
||||||
// Quit if it's not extracted with a constant index.
|
|
||||||
if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
|
|
||||||
!isa<ConstantSDNode>(L2In.getOperand(1)))
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
SDValue ExtractedFromVec = L2In.getOperand(0);
|
|
||||||
|
|
||||||
if (VecIn.getNode() == 0) {
|
|
||||||
VecIn = ExtractedFromVec;
|
|
||||||
VecInVT = ExtractedFromVec.getValueType();
|
|
||||||
} else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Quit if all operands of BUILD_VECTOR are undefined.
|
|
||||||
if (!VecIn.getNode())
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
// Fill the remaining mask as undef.
|
|
||||||
for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
|
|
||||||
Mask.push_back(-1);
|
|
||||||
|
|
||||||
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
|
|
||||||
DAG.getVectorShuffle(VecInVT, DL,
|
|
||||||
VecIn, DAG.getUNDEF(VecInVT),
|
|
||||||
&Mask[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
@ -5273,10 +5195,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
if (Broadcast.getNode())
|
if (Broadcast.getNode())
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
SDValue FpExt = LowerVectorFpExtend(Op, DAG);
|
|
||||||
if (FpExt.getNode())
|
|
||||||
return FpExt;
|
|
||||||
|
|
||||||
unsigned EVTBits = ExtVT.getSizeInBits();
|
unsigned EVTBits = ExtVT.getSizeInBits();
|
||||||
|
|
||||||
unsigned NumZero = 0;
|
unsigned NumZero = 0;
|
||||||
@ -8215,6 +8133,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
|
|||||||
return FIST;
|
return FIST;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
SDValue In = Op.getOperand(0);
|
||||||
|
EVT SVT = In.getValueType();
|
||||||
|
|
||||||
|
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
|
||||||
|
|
||||||
|
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
|
||||||
|
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
|
||||||
|
In, DAG.getUNDEF(SVT)));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
|
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
|
||||||
LLVMContext *Context = DAG.getContext();
|
LLVMContext *Context = DAG.getContext();
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
@ -11407,6 +11339,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
||||||
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
||||||
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
|
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
|
||||||
|
case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
|
||||||
case ISD::FABS: return LowerFABS(Op, DAG);
|
case ISD::FABS: return LowerFABS(Op, DAG);
|
||||||
case ISD::FNEG: return LowerFNEG(Op, DAG);
|
case ISD::FNEG: return LowerFNEG(Op, DAG);
|
||||||
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
|
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
|
||||||
|
@ -788,6 +788,7 @@ namespace llvm {
|
|||||||
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
||||||
@ -818,8 +819,6 @@ namespace llvm {
|
|||||||
|
|
||||||
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
virtual SDValue
|
virtual SDValue
|
||||||
LowerFormalArguments(SDValue Chain,
|
LowerFormalArguments(SDValue Chain,
|
||||||
CallingConv::ID CallConv, bool isVarArg,
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user