diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 4fe943a7c73..abdd938cc18 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,11 +520,23 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional alignment + // addrmode6 := reg with optional writeback and alignment // - // This is stored in two operands [regaddr, align]. The first is the - // address register. The second operand is the value of the alignment - // specifier to use or zero if no explicit alignment. + // This is stored in four operands [regaddr, regupdate, opc, align]. The + // first is the address register. The second register holds the value of + // a post-access increment for writeback or reg0 if no writeback or if the + // writeback increment is the size of the memory access. The third + // operand encodes whether there is writeback to the address register. The + // fourth operand is the value of the alignment specifier to use or zero if + // no explicit alignment. + + static inline unsigned getAM6Opc(bool WB = false) { + return (int)WB; + } + + static inline bool getAM6WBFlag(unsigned Mode) { + return Mode & 1; + } } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2aedb880a14..8e537d8b628 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -727,9 +727,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 + && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); } else { @@ -779,7 +780,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 3cf82433ed6..013e00ade7f 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -80,7 +80,8 @@ public: SDValue &Mode); bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align); + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, + SDValue &Opc, SDValue &Align); bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -501,8 +502,12 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, } bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Align) { + SDValue &Addr, SDValue &Update, + SDValue &Opc, SDValue &Align) { Addr = N; + // Default to no writeback. + Update = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); // Default to no alignment. Align = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1025,8 +1030,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1053,10 +1058,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDValue PredReg = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, Align, Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; std::vector ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); } EVT RegVT = GetNEONSubregVT(VT); @@ -1064,10 +1070,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, Align, Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; std::vector ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1079,21 +1086,25 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + std::vector ResTys(NumVecs, RegVT); ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, Align, Pred, PredReg, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Align, Pred, PredReg, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5); + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1112,8 +1123,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1139,8 +1150,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SmallVector Ops; + SmallVector Ops; Ops.push_back(MemAddr); + Ops.push_back(MemUpdate); + Ops.push_back(MemOpc); Ops.push_back(Align); if (is64BitVector) { @@ -1150,7 +1163,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(PredReg); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); } EVT RegVT = GetNEONSubregVT(VT); @@ -1167,12 +1180,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(PredReg); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + // Store the even subregs. for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, @@ -1182,20 +1198,20 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+5); + MVT::Other, Ops.data(), NumVecs+7); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+2] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+2] = Pred; - Ops[NumVecs+3] = PredReg; - Ops[NumVecs+4] = Chain; + Ops[NumVecs+4] = Pred; + Ops[NumVecs+5] = PredReg; + Ops[NumVecs+6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+5); + MVT::Other, Ops.data(), NumVecs+7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1208,8 +1224,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1245,8 +1261,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SmallVector Ops; + SmallVector Ops; Ops.push_back(MemAddr); + Ops.push_back(MemUpdate); + Ops.push_back(MemOpc); Ops.push_back(Align); unsigned Opc = 0; @@ -1273,12 +1291,12 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8); std::vector ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 94cd3202e42..53abf340ad0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -392,9 +392,9 @@ def addrmode5 : Operand, // addrmode6 := reg with optional writeback // def addrmode6 : Operand, - ComplexPattern { + ComplexPattern { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); + let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); } // addrmodepc := pc + reg diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e5e57c00b46..8fee6fa9527 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -227,7 +227,7 @@ class VLD3D op7_4, string OpcodeStr, string Dt> class VLD3WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr!", + OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "$addr.addr = $wb", []>; def VLD3d8 : VLD3D<0b0000, "vld3", "8">; @@ -259,7 +259,7 @@ class VLD4WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0001,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr!", + OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "$addr.addr = $wb", []>; def VLD4d8 : VLD4D<0b0000, "vld4", "8">; @@ -447,7 +447,7 @@ class VST3D op7_4, string OpcodeStr, string Dt> class VST3WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr!", + OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "$addr.addr = $wb", []>; def VST3d8 : VST3D<0b0000, "vst3", "8">; @@ -477,7 +477,7 @@ class VST4D op7_4, string OpcodeStr, string Dt> class VST4WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr!", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "$addr.addr = $wb", []>; def VST4d8 : VST4D<0b0000, "vst4", "8">; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 54535724b84..447afdd4fab 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -562,13 +562,22 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + const MachineOperand &MO4 = MI->getOperand(Op+3); O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getImm()) { + if (MO4.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + O << ", :" << MO4.getImm(); } O << "]"; + + if (ARM_AM::getAM6WBFlag(MO3.getImm())) { + if (MO2.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO2.getReg()); + } } void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index cf7fb6bf226..c1d68fbdf28 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -268,13 +268,17 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); - O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getImm()) { - // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + // FIXME: No support yet for specifying alignment. + O << '[' << getRegisterName(MO1.getReg()) << ']'; + + if (ARM_AM::getAM6WBFlag(MO3.getImm())) { + if (MO2.getReg() == 0) + O << '!'; + else + O << ", " << getRegisterName(MO2.getReg()); } - O << "]"; } void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index c64c5074bb2..d9942c8c840 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST2LNq16a: case ARM::VST2LNq32a: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 2; Offset = 0; Stride = 2; @@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNq16b: case ARM::VST2LNq32b: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 3; return true; case ARM::VST3q8a: case ARM::VST3q16a: case ARM::VST3q32a: - FirstOpnd = 3; + FirstOpnd = 5; NumRegs = 3; Offset = 0; Stride = 2; @@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3q8b: case ARM::VST3q16b: case ARM::VST3q32b: - FirstOpnd = 3; + FirstOpnd = 5; NumRegs = 3; Offset = 1; Stride = 2; @@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16a: case ARM::VST3LNq32a: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; @@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16b: case ARM::VST3LNq32b: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST4q8a: case ARM::VST4q16a: case ARM::VST4q32a: - FirstOpnd = 3; + FirstOpnd = 5; NumRegs = 4; Offset = 0; Stride = 2; @@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4q8b: case ARM::VST4q16b: case ARM::VST4q32b: - FirstOpnd = 3; + FirstOpnd = 5; NumRegs = 4; Offset = 1; Stride = 2; @@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16a: case ARM::VST4LNq32a: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; @@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16b: case ARM::VST4LNq32b: - FirstOpnd = 2; + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2;