Change VLD1 instructions for loading Q register values to operate on pairs

of D registers.  Add a separate VLD1q instruction with a Q register
destination operand for use by loadRegFromStackSlot.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99261 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bob Wilson 2010-03-23 05:25:43 +00:00
parent d8036fb0de
commit 621f195243
4 changed files with 62 additions and 43 deletions

View File

@ -788,7 +788,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
if (Align >= 16
&& (getRegisterInfo().canRealignStack(MF))) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
} else {

View File

@ -124,10 +124,10 @@ private:
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
SDNode *SelectDYN_ALLOC(SDNode *N);
/// SelectVLD - Select NEON load intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs == 2, QOpcodes1 is not used.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
@ -1022,7 +1022,7 @@ static EVT GetNEONSubregVT(EVT VT) {
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@ -1047,6 +1047,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VLD1/VST1");
break;
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@ -1060,15 +1063,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
}
EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs == 2) {
// Quad registers are directly supported for VLD2,
// loading 2 pairs of D regs.
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
std::vector<EVT> ResTys(4, VT);
std::vector<EVT> ResTys(2 * NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
Chain = SDValue(VLd, 4);
Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
@ -1831,9 +1834,17 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
default:
break;
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64 };
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
ARM::VLD2d32, ARM::VLD2d64 };
ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}

View File

@ -116,6 +116,7 @@ def h64imm : Operand<i64> {
//===----------------------------------------------------------------------===//
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand.
def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
"vldmia", "$addr, ${dst:dregpair}",
[(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
@ -126,6 +127,19 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
let Inst{11-8} = 0b1011;
}
let mayLoad = 1 in {
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDRQ allows an alignment to be specified.
// This is equivalent to VLD1q64 except that it has a Q register operand.
def VLD1q
: NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
def VLD1q_UPD
: NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
"${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
} // mayLoad = 1
// Use vstmia to store a Q register as a D register pair.
def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
"vstmia", "$addr, ${src:dregpair}",
@ -137,29 +151,27 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
let Inst{11-8} = 0b1011;
}
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
"vld1", Dt, "\\{$dst\\}, $addr", "",
[(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
: NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
"vld1", Dt, "${dst:dregpair}, $addr", "",
[(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
(ins addrmode6:$addr), IIC_VLD1,
"vld1", Dt, "\\{$dst\\}, $addr", "", []>;
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr), IIC_VLD1,
"vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
def VLD1d8 : VLD1D<0b0000, "8", v8i8>;
def VLD1d16 : VLD1D<0b0100, "16", v4i16>;
def VLD1d32 : VLD1D<0b1000, "32", v2i32>;
def VLD1df : VLD1D<0b1000, "32", v2f32>;
def VLD1d64 : VLD1D<0b1100, "64", v1i64>;
def VLD1d8 : VLD1D<0b0000, "8">;
def VLD1d16 : VLD1D<0b0100, "16">;
def VLD1d32 : VLD1D<0b1000, "32">;
def VLD1d64 : VLD1D<0b1100, "64">;
def VLD1q8 : VLD1Q<0b0000, "8", v16i8>;
def VLD1q16 : VLD1Q<0b0100, "16", v8i16>;
def VLD1q32 : VLD1Q<0b1000, "32", v4i32>;
def VLD1qf : VLD1Q<0b1000, "32", v4f32>;
def VLD1q64 : VLD1Q<0b1100, "64", v2i64>;
let mayLoad = 1 in {
def VLD1q8 : VLD1Q<0b0000, "8">;
def VLD1q16 : VLD1Q<0b0100, "16">;
def VLD1q32 : VLD1Q<0b1000, "32">;
def VLD1q64 : VLD1Q<0b1100, "64">;
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
@ -182,9 +194,6 @@ def VLD1q8_UPD : VLD1QWB<0b0000, "8">;
def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
} // mayLoad = 1
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
@ -242,9 +251,6 @@ class VLD2Q<bits<4> op7_4, string Dt>
def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr), IIC_VLD1,
"vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
@ -266,11 +272,6 @@ class VLD2QWB<bits<4> op7_4, string Dt>
def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
(outs DPR:$dst1, DPR:$dst2, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
"vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
"$addr.addr = $wb", []>;
def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;

View File

@ -46,10 +46,17 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
case ARM::VLD1q8:
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
FirstOpnd = 0;
NumRegs = 2;
return true;
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2d64:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32: