ARM refactor more NEON VLD/VST instructions to use composite physregs

Register pair VLD1/VLD2 all-lanes instructions. Kill off more of the
pseudos as a result.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152150 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jim Grosbach
2012-03-06 22:01:44 +00:00
parent 40530ad3a8
commit c0fc450f07
9 changed files with 118 additions and 169 deletions

View File

@ -2760,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD:
case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32Pseudo: case ARM::VLD1DUPq32:
case ARM::VLD1DUPq8PseudoWB_fixed: case ARM::VLD1DUPq8wb_fixed:
case ARM::VLD1DUPq16PseudoWB_fixed: case ARM::VLD1DUPq16wb_fixed:
case ARM::VLD1DUPq32PseudoWB_fixed: case ARM::VLD1DUPq32wb_fixed:
case ARM::VLD1DUPq8PseudoWB_register: case ARM::VLD1DUPq8wb_register:
case ARM::VLD1DUPq16PseudoWB_register: case ARM::VLD1DUPq16wb_register:
case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32Pseudo: case ARM::VLD2DUPd32:
case ARM::VLD2DUPd8PseudoWB_fixed: case ARM::VLD2DUPd8wb_fixed:
case ARM::VLD2DUPd16PseudoWB_fixed: case ARM::VLD2DUPd16wb_fixed:
case ARM::VLD2DUPd32PseudoWB_fixed: case ARM::VLD2DUPd32wb_fixed:
case ARM::VLD2DUPd8PseudoWB_register: case ARM::VLD2DUPd8wb_register:
case ARM::VLD2DUPd16PseudoWB_register: case ARM::VLD2DUPd16wb_register:
case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo: case ARM::VLD4DUPd32Pseudo:

View File

@ -129,16 +129,6 @@ namespace {
} }
static const NEONLdStTableEntry NEONLdStTable[] = { static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false},
{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false},
{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false},
{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false},
{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false},
{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false},
{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false},
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false},
{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false},
{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false},
{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false},
{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false},
{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false},
{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false},
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD:
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
case ARM::VLD1DUPq8PseudoWB_fixed:
case ARM::VLD1DUPq16PseudoWB_fixed:
case ARM::VLD1DUPq32PseudoWB_fixed:
case ARM::VLD1DUPq8PseudoWB_register:
case ARM::VLD1DUPq16PseudoWB_register:
case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
case ARM::VLD2DUPd8PseudoWB_fixed:
case ARM::VLD2DUPd16PseudoWB_fixed:
case ARM::VLD2DUPd32PseudoWB_fixed:
case ARM::VLD2DUPd8PseudoWB_register:
case ARM::VLD2DUPd16PseudoWB_register:
case ARM::VLD2DUPd32PseudoWB_register:
case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd8Pseudo:
case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd16Pseudo:
case ARM::VLD3DUPd32Pseudo: case ARM::VLD3DUPd32Pseudo:

View File

@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
} }
return Opc; // If not one we handle, return it unchanged. return Opc; // If not one we handle, return it unchanged.
} }
@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
} }
case ARMISD::VLD2DUP: { case ARMISD::VLD2DUP: {
unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo, unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32Pseudo }; ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes); return SelectVLDDup(N, false, 2, Opcodes);
} }
@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
} }
case ARMISD::VLD2DUP_UPD: { case ARMISD::VLD2DUP_UPD: {
unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed };
ARM::VLD2DUPd32PseudoWB_fixed };
return SelectVLDDup(N, true, 2, Opcodes); return SelectVLDDup(N, true, 2, Opcodes);
} }

View File

@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList"; let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands"; let RenderMethod = "addVecListOperands";
} }
def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> { def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
let ParserMatchClass = VecListDPairAsmOperand; let ParserMatchClass = VecListDPairAsmOperand;
} }
// Register list of three sequential D registers. // Register list of three sequential D registers.
@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList"; let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands"; let RenderMethod = "addVecListOperands";
} }
def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> { def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListDPairSpacedAsmOperand; let ParserMatchClass = VecListDPairSpacedAsmOperand;
} }
// Register list of three D registers spaced by 2 (three Q registers). // Register list of three D registers spaced by 2 (three Q registers).
@ -153,13 +153,14 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
let ParserMatchClass = VecListOneDAllLanesAsmOperand; let ParserMatchClass = VecListOneDAllLanesAsmOperand;
} }
// Register list of two D registers, with "all lanes" subscripting. // Register list of two D registers, with "all lanes" subscripting.
def VecListTwoDAllLanesAsmOperand : AsmOperandClass { def VecListDPairAllLanesAsmOperand : AsmOperandClass {
let Name = "VecListTwoDAllLanes"; let Name = "VecListDPairAllLanes";
let ParserMethod = "parseVectorList"; let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands"; let RenderMethod = "addVecListOperands";
} }
def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { def VecListDPairAllLanes : RegisterOperand<DPair,
let ParserMatchClass = VecListTwoDAllLanesAsmOperand; "printVectorListTwoAllLanes"> {
let ParserMatchClass = VecListDPairAllLanesAsmOperand;
} }
// Register list of two D registers spaced by 2 (two sequential Q registers). // Register list of two D registers spaced by 2 (two sequential Q registers).
def VecListTwoQAllLanesAsmOperand : AsmOperandClass { def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
let Inst{4} = Rn{4}; let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction"; let DecoderMethod = "DecodeVLD1DupInstruction";
} }
class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
let Pattern = [(set QPR:$dst,
(Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
}
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>; (VLD1DUPd32 addrmode6:$addr)>;
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32Pseudo addrmode6:$addr)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
class VLD1QDUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup, (ins addrmode6dup:$Rn), IIC_VLD1dup,
"vld1", Dt, "$Vd, $Rn", "", []> { "vld1", Dt, "$Vd, $Rn", "",
[(set VecListDPairAllLanes:$Vd,
(Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111; let Rm = 0b1111;
let Inst{4} = Rn{4}; let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction"; let DecoderMethod = "DecodeVLD1DupInstruction";
} }
def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback: // ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
} }
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListTwoDAllLanes:$Vd, GPR:$wb), (outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu, (ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!", "vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> { "$Rn.addr = $wb", []> {
@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
let AsmMatchConverter = "cvtVLDwbFixed"; let AsmMatchConverter = "cvtVLDwbFixed";
} }
def _register : NLdSt<1, 0b10, 0b1100, op7_4, def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListTwoDAllLanes:$Vd, GPR:$wb), (outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm", "vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> { "$Rn.addr = $wb", []> {
@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
// VLD2DUP : Vector Load (single 2-element structure to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
@ -1378,13 +1365,9 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
let DecoderMethod = "DecodeVLD2DupInstruction"; let DecoderMethod = "DecodeVLD2DupInstruction";
} }
def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
// ...with double-spaced registers (not used for codegen): // ...with double-spaced registers (not used for codegen):
def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>;
@ -1414,21 +1397,14 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
} }
} }
defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>;
defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>;
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>;
def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes) // VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt> class VLD3DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),

View File

@ -1133,9 +1133,10 @@ public:
return VectorList.Count == 1; return VectorList.Count == 1;
} }
bool isVecListTwoDAllLanes() const { bool isVecListDPairAllLanes() const {
if (!isSingleSpacedVectorAllLanes()) return false; if (!isSingleSpacedVectorAllLanes()) return false;
return VectorList.Count == 2; return (ARMMCRegisterClasses[ARM::DPairRegClassID]
.contains(VectorList.RegNum));
} }
bool isVecListTwoQAllLanes() const { bool isVecListTwoQAllLanes() const {
@ -2981,12 +2982,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
case NoLanes: case NoLanes:
E = Parser.getTok().getLoc(); E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]); &ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break; break;
case AllLanes: case AllLanes:
E = Parser.getTok().getLoc(); E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
S, E)); S, E));
break; break;
@ -3152,7 +3154,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (LaneKind) { switch (LaneKind) {
case NoLanes: case NoLanes:
// Non-lane two-register operands have been converted to the // Two-register operands have been converted to the
// composite register classes. // composite register classes.
if (Count == 2) { if (Count == 2) {
const MCRegisterClass *RC = (Spacing == 1) ? const MCRegisterClass *RC = (Spacing == 1) ?
@ -3165,6 +3167,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
(Spacing == 2), S, E)); (Spacing == 2), S, E));
break; break;
case AllLanes: case AllLanes:
// Two-register operands have been converted to the
// composite register classes.
if (Count == 2 && Spacing == 1) {
const MCRegisterClass *RC = &ARMMCRegisterClasses[ARM::DPairRegClassID];
FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
}
Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
(Spacing == 2), (Spacing == 2),
S, E)); S, E));

View File

@ -2001,27 +2001,21 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// First output register // First output register
switch (Inst.getOpcode()) { switch (Inst.getOpcode()) {
case ARM::VLD1q16: case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
case ARM::VLD1q32: case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
case ARM::VLD1q64: case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
case ARM::VLD1q8: case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
case ARM::VLD1q16wb_fixed: case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
case ARM::VLD1q16wb_register: case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
case ARM::VLD1q32wb_fixed: case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
case ARM::VLD1q32wb_register: case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
case ARM::VLD1q64wb_fixed: case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
case ARM::VLD1q64wb_register:
case ARM::VLD1q8wb_fixed: // FIXME: These go in the VLDnDup* functions, not here.
case ARM::VLD1q8wb_register: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
case ARM::VLD2d16: case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
case ARM::VLD2d32: case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
case ARM::VLD2d8: case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2d32wb_register:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail; return MCDisassembler::Fail;
break; break;
@ -2525,8 +2519,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
align *= (1 << size); align *= (1 << size);
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) switch (Inst.getOpcode()) {
return MCDisassembler::Fail; case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
}
if (Rm != 0xF) { if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail; return MCDisassembler::Fail;
@ -2559,8 +2564,19 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned pred = fieldFromInstruction32(Insn, 22, 4); unsigned pred = fieldFromInstruction32(Insn, 22, 4);
align *= 2*size; align *= 2*size;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) switch (Inst.getOpcode()) {
return MCDisassembler::Fail; case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
}
if (Rm != 0xF) if (Rm != 0xF)
Inst.addOperand(MCOperand::CreateImm(0)); Inst.addOperand(MCOperand::CreateImm(0));

View File

@ -1025,7 +1025,7 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}"; O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
} }
void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
raw_ostream &O) { raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@ -1033,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
} }
void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI, void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
unsigned OpNum, unsigned OpNum,
raw_ostream &O) { raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@ -1072,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
unsigned OpNum, unsigned OpNum,
raw_ostream &O) { raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with unsigned Reg = MI->getOperand(OpNum).getReg();
// addition to get the next register, but for VFP registers, the unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
// sort order is guaranteed because they're all of the form D<n>. unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
<< getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
} }
void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@ -1102,15 +1101,6 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
} }
void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
<< getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
}
void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
unsigned OpNum, unsigned OpNum,
raw_ostream &O) { raw_ostream &O) {

View File

@ -133,9 +133,9 @@ public:
void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum, void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
raw_ostream &O); raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
@ -146,8 +146,6 @@ public:
raw_ostream &O); raw_ostream &O);
void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
raw_ostream &O); raw_ostream &O);
void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
raw_ostream &O); raw_ostream &O);
void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,

View File

@ -579,7 +579,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
REG("VecListThreeD"); REG("VecListThreeD");
REG("VecListFourD"); REG("VecListFourD");
REG("VecListOneDAllLanes"); REG("VecListOneDAllLanes");
REG("VecListTwoDAllLanes"); REG("VecListDPairAllLanes");
REG("VecListTwoQAllLanes"); REG("VecListTwoQAllLanes");
IMM("i32imm"); IMM("i32imm");