mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Change DAG nodes for Neon VLD2/3/4 operations to return multiple results.
Get rid of yesterday's code to fix the register usage during isel. Select the new DAG nodes to machine instructions. The new pre-alloc pass to choose adjacent registers for these results is not done, so the results of this will generally not assemble yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78136 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c2a9eeb273
commit
4a3d35abef
@ -1284,7 +1284,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
|||||||
MVT HalfVT;
|
MVT HalfVT;
|
||||||
unsigned Opc = 0;
|
unsigned Opc = 0;
|
||||||
switch (VT.getVectorElementType().getSimpleVT()) {
|
switch (VT.getVectorElementType().getSimpleVT()) {
|
||||||
default: assert(false && "unhandled VDUP splat type");
|
default: llvm_unreachable("unhandled VDUP splat type");
|
||||||
case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break;
|
case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break;
|
||||||
case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break;
|
case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break;
|
||||||
case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break;
|
case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break;
|
||||||
@ -1304,6 +1304,62 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case ARMISD::VLD2D: {
|
||||||
|
MVT VT = Op.getValueType();
|
||||||
|
SDValue MemAddr, MemUpdate, MemOpc;
|
||||||
|
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||||
|
return NULL;
|
||||||
|
unsigned Opc;
|
||||||
|
switch (VT.getSimpleVT()) {
|
||||||
|
default: llvm_unreachable("unhandled VLD2D type");
|
||||||
|
case MVT::v8i8: Opc = ARM::VLD2d8; break;
|
||||||
|
case MVT::v4i16: Opc = ARM::VLD2d16; break;
|
||||||
|
case MVT::v2f32:
|
||||||
|
case MVT::v2i32: Opc = ARM::VLD2d32; break;
|
||||||
|
case MVT::v1i64: Opc = ARM::VLD2d64; break;
|
||||||
|
}
|
||||||
|
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc };
|
||||||
|
return CurDAG->getTargetNode(Opc, dl, VT, VT, MVT::Other, Ops, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
case ARMISD::VLD3D: {
|
||||||
|
MVT VT = Op.getValueType();
|
||||||
|
SDValue MemAddr, MemUpdate, MemOpc;
|
||||||
|
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||||
|
return NULL;
|
||||||
|
unsigned Opc;
|
||||||
|
switch (VT.getSimpleVT()) {
|
||||||
|
default: llvm_unreachable("unhandled VLD3D type");
|
||||||
|
case MVT::v8i8: Opc = ARM::VLD3d8; break;
|
||||||
|
case MVT::v4i16: Opc = ARM::VLD3d16; break;
|
||||||
|
case MVT::v2f32:
|
||||||
|
case MVT::v2i32: Opc = ARM::VLD3d32; break;
|
||||||
|
case MVT::v1i64: Opc = ARM::VLD3d64; break;
|
||||||
|
}
|
||||||
|
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc };
|
||||||
|
return CurDAG->getTargetNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
case ARMISD::VLD4D: {
|
||||||
|
MVT VT = Op.getValueType();
|
||||||
|
SDValue MemAddr, MemUpdate, MemOpc;
|
||||||
|
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||||
|
return NULL;
|
||||||
|
unsigned Opc;
|
||||||
|
switch (VT.getSimpleVT()) {
|
||||||
|
default: llvm_unreachable("unhandled VLD4D type");
|
||||||
|
case MVT::v8i8: Opc = ARM::VLD4d8; break;
|
||||||
|
case MVT::v4i16: Opc = ARM::VLD4d16; break;
|
||||||
|
case MVT::v2f32:
|
||||||
|
case MVT::v2i32: Opc = ARM::VLD4d32; break;
|
||||||
|
case MVT::v1i64: Opc = ARM::VLD4d64; break;
|
||||||
|
}
|
||||||
|
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc };
|
||||||
|
std::vector<MVT> ResTys(4, VT);
|
||||||
|
ResTys.push_back(MVT::Other);
|
||||||
|
return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return SelectCode(Op);
|
return SelectCode(Op);
|
||||||
|
@ -1323,7 +1323,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
|
static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
|
||||||
unsigned Opcode, unsigned NumVecs) {
|
unsigned Opcode) {
|
||||||
SDNode *Node = Op.getNode();
|
SDNode *Node = Op.getNode();
|
||||||
MVT VT = Node->getValueType(0);
|
MVT VT = Node->getValueType(0);
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
@ -1332,25 +1332,8 @@ static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
|
|||||||
return SDValue(); // unimplemented
|
return SDValue(); // unimplemented
|
||||||
|
|
||||||
SDValue Ops[] = { Node->getOperand(0),
|
SDValue Ops[] = { Node->getOperand(0),
|
||||||
Node->getOperand(1) };
|
Node->getOperand(2) };
|
||||||
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
|
return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2);
|
||||||
SDValue Result = DAG.getNode(Opcode, dl, Tys, Ops, 2);
|
|
||||||
|
|
||||||
static const unsigned VLDRegs[] = {
|
|
||||||
ARM::D0, ARM::D1, ARM::D2, ARM::D3
|
|
||||||
};
|
|
||||||
|
|
||||||
SmallVector<SDValue, 4> ResultVals;
|
|
||||||
SDValue Chain = Result.getValue(0);
|
|
||||||
SDValue Flag = Result.getValue(1);
|
|
||||||
for (unsigned N = 0; N < NumVecs; ++N) {
|
|
||||||
Chain = DAG.getCopyFromReg(Chain, dl, VLDRegs[N], VT, Flag).getValue(1);
|
|
||||||
ResultVals.push_back(Chain.getValue(0));
|
|
||||||
Flag = Chain.getValue(2);
|
|
||||||
}
|
|
||||||
ResultVals.push_back(Chain);
|
|
||||||
return DAG.getNode(ISD::MERGE_VALUES, dl, Node->getVTList(),
|
|
||||||
ResultVals.data(), NumVecs + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
@ -1359,13 +1342,13 @@ ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||||||
switch (IntNo) {
|
switch (IntNo) {
|
||||||
case Intrinsic::arm_neon_vld2i:
|
case Intrinsic::arm_neon_vld2i:
|
||||||
case Intrinsic::arm_neon_vld2f:
|
case Intrinsic::arm_neon_vld2f:
|
||||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D, 2);
|
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D);
|
||||||
case Intrinsic::arm_neon_vld3i:
|
case Intrinsic::arm_neon_vld3i:
|
||||||
case Intrinsic::arm_neon_vld3f:
|
case Intrinsic::arm_neon_vld3f:
|
||||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D, 3);
|
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D);
|
||||||
case Intrinsic::arm_neon_vld4i:
|
case Intrinsic::arm_neon_vld4i:
|
||||||
case Intrinsic::arm_neon_vld4f:
|
case Intrinsic::arm_neon_vld4f:
|
||||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D, 4);
|
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D);
|
||||||
case Intrinsic::arm_neon_vst2i:
|
case Intrinsic::arm_neon_vst2i:
|
||||||
case Intrinsic::arm_neon_vst2f:
|
case Intrinsic::arm_neon_vst2f:
|
||||||
case Intrinsic::arm_neon_vst3i:
|
case Intrinsic::arm_neon_vst3i:
|
||||||
|
@ -68,13 +68,18 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
|
|||||||
def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ",
|
def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>;
|
SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>;
|
||||||
|
|
||||||
def SDTARMVLD : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
|
def SDTARMVLD2 : SDTypeProfile<2, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
|
||||||
def NEONvld2d : SDNode<"ARMISD::VLD2D", SDTARMVLD,
|
def SDTARMVLD3 : SDTypeProfile<3, 1, [SDTCisSameAs<0, 1>,
|
||||||
[SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
|
SDTCisSameAs<0, 2>, SDTCisPtrTy<3>]>;
|
||||||
def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD,
|
def SDTARMVLD4 : SDTypeProfile<4, 1, [SDTCisSameAs<0, 1>,
|
||||||
[SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
|
SDTCisSameAs<0, 2>,
|
||||||
def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD,
|
SDTCisSameAs<0, 3>, SDTCisPtrTy<4>]>;
|
||||||
[SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
|
def NEONvld2d : SDNode<"ARMISD::VLD2D", SDTARMVLD2,
|
||||||
|
[SDNPHasChain, SDNPMayLoad]>;
|
||||||
|
def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD3,
|
||||||
|
[SDNPHasChain, SDNPMayLoad]>;
|
||||||
|
def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD4,
|
||||||
|
[SDNPHasChain, SDNPMayLoad]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// NEON operand definitions
|
// NEON operand definitions
|
||||||
@ -183,6 +188,37 @@ def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>;
|
|||||||
def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
|
def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
|
||||||
def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
|
def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
|
||||||
|
|
||||||
|
// VLD2 : Vector Load (multiple 2-element structures)
|
||||||
|
class VLD2D<string OpcodeStr>
|
||||||
|
: NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr),
|
||||||
|
!strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), []>;
|
||||||
|
|
||||||
|
def VLD2d8 : VLD2D<"vld2.8">;
|
||||||
|
def VLD2d16 : VLD2D<"vld2.16">;
|
||||||
|
def VLD2d32 : VLD2D<"vld2.32">;
|
||||||
|
def VLD2d64 : VLD2D<"vld2.64">;
|
||||||
|
|
||||||
|
// VLD3 : Vector Load (multiple 3-element structures)
|
||||||
|
class VLD3D<string OpcodeStr>
|
||||||
|
: NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr),
|
||||||
|
!strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), []>;
|
||||||
|
|
||||||
|
def VLD3d8 : VLD3D<"vld3.8">;
|
||||||
|
def VLD3d16 : VLD3D<"vld3.16">;
|
||||||
|
def VLD3d32 : VLD3D<"vld3.32">;
|
||||||
|
def VLD3d64 : VLD3D<"vld3.64">;
|
||||||
|
|
||||||
|
// VLD4 : Vector Load (multiple 4-element structures)
|
||||||
|
class VLD4D<string OpcodeStr>
|
||||||
|
: NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
|
||||||
|
(ins addrmode6:$addr),
|
||||||
|
!strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), []>;
|
||||||
|
|
||||||
|
def VLD4d8 : VLD4D<"vld4.8">;
|
||||||
|
def VLD4d16 : VLD4D<"vld4.16">;
|
||||||
|
def VLD4d32 : VLD4D<"vld4.32">;
|
||||||
|
def VLD4d64 : VLD4D<"vld4.64">;
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// NEON pattern fragments
|
// NEON pattern fragments
|
||||||
|
@ -93,6 +93,8 @@ bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
|
|||||||
|
|
||||||
bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
|
bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
|
||||||
CodeGenOpt::Level OptLevel) {
|
CodeGenOpt::Level OptLevel) {
|
||||||
|
// Call NEON pre-alloc pass here.
|
||||||
|
|
||||||
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
|
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
|
||||||
if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
|
if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
|
||||||
PM.add(createARMLoadStoreOptimizationPass(true));
|
PM.add(createARMLoadStoreOptimizationPass(true));
|
||||||
|
Loading…
Reference in New Issue
Block a user