mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-09 10:31:14 +00:00
Lower Neon VLD* intrinsics to custom DAG nodes, and manually allocate the
results to fixed registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78025 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
916afdbc2d
commit
a599bff101
@ -323,6 +323,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
|
||||
// We want to custom lower some of our intrinsics.
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::SETCC, MVT::i32, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::f32, Expand);
|
||||
@ -463,6 +464,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
|
||||
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
|
||||
case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ";
|
||||
case ARMISD::VLD2D: return "ARMISD::VLD2D";
|
||||
case ARMISD::VLD3D: return "ARMISD::VLD3D";
|
||||
case ARMISD::VLD4D: return "ARMISD::VLD4D";
|
||||
}
|
||||
}
|
||||
|
||||
@ -1318,6 +1322,60 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
|
||||
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
|
||||
}
|
||||
|
||||
static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned Opcode, unsigned NumVecs) {
|
||||
SDNode *Node = Op.getNode();
|
||||
MVT VT = Node->getValueType(0);
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (!VT.is64BitVector())
|
||||
return SDValue(); // unimplemented
|
||||
|
||||
SDValue Ops[] = { Node->getOperand(0),
|
||||
Node->getOperand(1) };
|
||||
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
SDValue Result = DAG.getNode(Opcode, dl, Tys, Ops, 2);
|
||||
|
||||
static const unsigned VLDRegs[] = {
|
||||
ARM::D0, ARM::D1, ARM::D2, ARM::D3
|
||||
};
|
||||
|
||||
SmallVector<SDValue, 4> ResultVals;
|
||||
SDValue Chain = Result.getValue(0);
|
||||
SDValue Flag = Result.getValue(1);
|
||||
for (unsigned N = 0; N < NumVecs; ++N) {
|
||||
Chain = DAG.getCopyFromReg(Chain, dl, VLDRegs[N], VT, Flag).getValue(1);
|
||||
ResultVals.push_back(Chain.getValue(0));
|
||||
Flag = Chain.getValue(2);
|
||||
}
|
||||
ResultVals.push_back(Chain);
|
||||
return DAG.getNode(ISD::MERGE_VALUES, dl, Node->getVTList(),
|
||||
ResultVals.data(), NumVecs + 1);
|
||||
}
|
||||
|
||||
SDValue
|
||||
ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
switch (IntNo) {
|
||||
case Intrinsic::arm_neon_vld2i:
|
||||
case Intrinsic::arm_neon_vld2f:
|
||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D, 2);
|
||||
case Intrinsic::arm_neon_vld3i:
|
||||
case Intrinsic::arm_neon_vld3f:
|
||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D, 3);
|
||||
case Intrinsic::arm_neon_vld4i:
|
||||
case Intrinsic::arm_neon_vld4f:
|
||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D, 4);
|
||||
case Intrinsic::arm_neon_vst2i:
|
||||
case Intrinsic::arm_neon_vst2f:
|
||||
case Intrinsic::arm_neon_vst3i:
|
||||
case Intrinsic::arm_neon_vst3f:
|
||||
case Intrinsic::arm_neon_vst4i:
|
||||
case Intrinsic::arm_neon_vst4f:
|
||||
default: return SDValue(); // Don't custom lower most intrinsics.
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
@ -2354,6 +2412,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
||||
case ISD::RETURNADDR: break;
|
||||
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
|
||||
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
|
||||
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
|
||||
case ISD::SHL:
|
||||
|
@ -114,7 +114,12 @@ namespace llvm {
|
||||
VGETLANEs, // sign-extend vector extract element
|
||||
|
||||
// Vector duplicate lane (128-bit result only; 64-bit is a shuffle)
|
||||
VDUPLANEQ // splat a lane from a 64-bit vector to a 128-bit vector
|
||||
VDUPLANEQ, // splat a lane from a 64-bit vector to a 128-bit vector
|
||||
|
||||
// Vector load/store with (de)interleaving
|
||||
VLD2D,
|
||||
VLD3D,
|
||||
VLD4D
|
||||
};
|
||||
}
|
||||
|
||||
@ -237,6 +242,7 @@ namespace llvm {
|
||||
SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
|
||||
unsigned CallingConv, SelectionDAG &DAG);
|
||||
SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
|
||||
|
@ -68,6 +68,14 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
|
||||
def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>;
|
||||
|
||||
def SDTARMVLD : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
|
||||
def NEONvld2d : SDNode<"ARMISD::VLD2D", SDTARMVLD,
|
||||
[SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
|
||||
def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD,
|
||||
[SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
|
||||
def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD,
|
||||
[SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON operand definitions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
Loading…
Reference in New Issue
Block a user