From 226036ee731a2041f37f28f958d2b6a50373f4f4 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Sat, 20 Mar 2010 22:13:40 +0000 Subject: [PATCH] Re-commit r98683 ("remove redundant writeback flag from ARM address mode 6") with changes to add a separate optional register update argument. Change all the NEON instructions with address register writeback to use it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99095 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAddressingModes.h | 20 +--- lib/Target/ARM/ARMBaseInstrInfo.cpp | 7 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 88 ++++++--------- lib/Target/ARM/ARMInstrInfo.td | 11 +- lib/Target/ARM/ARMInstrNEON.td | 112 +++++++++---------- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 20 ++-- lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp | 23 ++-- lib/Target/ARM/AsmPrinter/ARMInstPrinter.h | 1 + lib/Target/ARM/NEONPreAllocPass.cpp | 28 ++--- 9 files changed, 145 insertions(+), 165 deletions(-) diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 21b57752270..ea62c3330e6 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -524,23 +524,11 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback and alignment + // addrmode6 := reg with optional alignment // - // This is stored in four operands [regaddr, regupdate, opc, align]. The - // first is the address register. The second register holds the value of - // a post-access increment for writeback or reg0 if no writeback or if the - // writeback increment is the size of the memory access. The third - // operand encodes whether there is writeback to the address register. The - // fourth operand is the value of the alignment specifier to use or zero if - // no explicit alignment. - - static inline unsigned getAM6Opc(bool WB = false) { - return (int)WB; - } - - static inline bool getAM6WBFlag(unsigned Mode) { - return Mode & 1; - } + // This is stored in two operands [regaddr, align]. The first is the + // address register. The second operand is the value of the alignment + // specifier to use or zero if no explicit alignment. } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5132dd6464a..e6ea03aa4aa 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -737,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); } else { @@ -790,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 0ddf5a0ecab..71207c811e4 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -80,8 +80,7 @@ public: SDValue &Mode); bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align); + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align); bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, } bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align) { + SDValue &Addr, SDValue &Align) { Addr = N; - // Default to no writeback. - Update = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); // Default to no alignment. Align = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); - std::vector ResTys(NumVecs, RegVT); ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); + const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, - Align, Pred, PredReg, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), + Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector Ops; + SmallVector Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); if (is64BitVector) { @@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + Ops.push_back(Reg0); // post-access address offset // Store the even subregs. for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, N->getOperand(Vec+3))); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+4] = Pred; - Ops[NumVecs+5] = PredReg; - Ops[NumVecs+6] = Chain; + Ops[NumVecs+5] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector Ops; + SmallVector Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); unsigned Opc = 0; @@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6); std::vector ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index a70d6dfe815..26a280697b9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -285,7 +285,7 @@ def pclabel : Operand { // shifter_operand operands: so_reg and so_imm. def so_reg : Operand, // reg reg imm - ComplexPattern { let PrintMethod = "printSORegOperand"; let MIOperandInfo = (ops GPR, GPR, i32imm); @@ -395,9 +395,14 @@ def addrmode5 : Operand, // addrmode6 := reg with optional writeback // def addrmode6 : Operand, - ComplexPattern { + ComplexPattern { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm); +} + +def am6offset : Operand { + let PrintMethod = "printAddrMode6OffsetOperand"; + let MIOperandInfo = (ops GPR); } // addrmodepc := pc + reg diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 6b68c383eca..c977cc3f5d6 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -164,13 +164,13 @@ let mayLoad = 1 in { // ...with address register writeback: class VLD1DWB op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "\\{$dst\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "\\{$dst\\}, $addr$offset", "$addr.addr = $wb", []>; class VLD1QWB op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "${dst:dregpair}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>; def VLD1d8_UPD : VLD1DWB<0b0000, "8">; @@ -211,14 +211,14 @@ def VLD1d32Q : VLD1D4<0b1000, "32">; // ...with address register writeback: class VLD1D3WB op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3\\}, $addr", "$addr.addr = $wb", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", [/* For disassembly only; pattern left blank */]>; class VLD1D4WB op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "$addr.addr = $wb", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", [/* For disassembly only; pattern left blank */]>; def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; @@ -256,14 +256,14 @@ def VLD2q32 : VLD2Q<0b1000, "32">; // ...with address register writeback: class VLD2DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", "$addr.addr = $wb", []>; class VLD2QWB op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", []>; def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; @@ -271,8 +271,8 @@ def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset", "$addr.addr = $wb", []>; def VLD2q8_UPD : VLD2QWB<0b0000, "8">; @@ -305,8 +305,8 @@ def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, class VLD3DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD3, - "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; @@ -314,8 +314,8 @@ def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): @@ -351,8 +351,8 @@ def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, class VLD4DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD4, - "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", []>; def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; @@ -361,9 +361,9 @@ def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD1, + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64", - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): @@ -404,9 +404,9 @@ def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } // ...with address register writeback: class VLD2LNWB op11_8, string Dt> : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, - "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", + "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">; @@ -440,10 +440,10 @@ def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } class VLD3LNWB op11_8, string Dt> : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3, "vld3", Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", []>; @@ -479,10 +479,10 @@ def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } class VLD4LNWB op11_8, string Dt> : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4, "vld4", Dt, -"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", +"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", []>; @@ -529,12 +529,12 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // ...with address register writeback: class VST1DWB op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src), IIC_VST, - "vst1", Dt, "\\{$src\\}, $addr", "$addr.addr = $wb", []>; + (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, + "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; class VST1QWB op7_4, string Dt> : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, QPR:$src), IIC_VST, - "vst1", Dt, "${src:dregpair}, $addr", "$addr.addr = $wb", []>; + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; def VST1d8_UPD : VST1DWB<0b0000, "8">; def VST1d16_UPD : VST1DWB<0b0100, "16">; @@ -571,16 +571,16 @@ def VST1d32Q : VST1D4<0b1000, "32">; // ...with address register writeback: class VST1D3WB op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", "$addr.addr = $wb", [/* For disassembly only; pattern left blank */]>; class VST1D4WB op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", [/* For disassembly only; pattern left blank */]>; @@ -619,23 +619,23 @@ def VST2q32 : VST2Q<0b1000, "32">; // ...with address register writeback: class VST2DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), - IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", "$addr.addr = $wb", []>; class VST2QWB op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr", + "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", "$addr.addr = $wb", []>; def VST2q8_UPD : VST2QWB<0b0000, "8">; @@ -667,18 +667,18 @@ def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), // ...with address register writeback: class VST3DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", "$addr.addr = $wb", []>; def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", + "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset", "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): @@ -713,19 +713,19 @@ def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), // ...with address register writeback: class VST4DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, "vst1", "64", - "\\{$src1, $src2, $src3, $src4\\}, $addr", + "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): @@ -766,9 +766,9 @@ def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; } // ...with address register writeback: class VST2LNWB op11_8, string Dt> : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, - "\\{$src1[$lane], $src2[$lane]\\}, $addr", + "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; def VST2LNd8_UPD : VST2LNWB<0b0001, "8">; @@ -800,10 +800,10 @@ def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } // ...with address register writeback: class VST3LNWB op11_8, string Dt> : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST, "vst3", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", + "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; } @@ -836,10 +836,10 @@ def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; } // ...with address register writeback: class VST4LNWB op11_8, string Dt> : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), - (ins addrmode6:$addr, + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST, "vst4", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", + "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; def VST4LNd8_UPD : VST4LNWB<0b0011, "8">; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index ce7c488f5c0..4a7a1e43bd3 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -101,6 +101,7 @@ namespace { void printAddrMode5Operand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MachineInstr *MI, int OpNum); + void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum); void printAddrModePCOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); @@ -562,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - const MachineOperand &MO4 = MI->getOperand(Op+3); O << "[" << getRegisterName(MO1.getReg()); - if (MO4.getImm()) { + if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO4.getImm(); + O << ", :" << MO2.getImm(); } O << "]"; +} - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << "!"; - else - O << ", " << getRegisterName(MO2.getReg()); - } +void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO = MI->getOperand(Op); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 33db90eb15a..30763a9952e 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -428,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - // FIXME: No support yet for specifying alignment. - O << '[' << getRegisterName(MO1.getReg()) << ']'; - - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << '!'; - else - O << ", " << getRegisterName(MO2.getReg()); + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << MO2.getImm(); } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index ceb641d95bb..d41b5dfa6c5 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -48,6 +48,7 @@ public: void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum); void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index b010c5a53f2..c36fe63b069 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; case ARM::VST2LNq16: case ARM::VST2LNq32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 2; Offset = 0; Stride = 2; @@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNq16odd: case ARM::VST2LNq32odd: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 3; return true; case ARM::VST3q8_UPD: case ARM::VST3q16_UPD: case ARM::VST3q32_UPD: - FirstOpnd = 5; + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; @@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3q8odd_UPD: case ARM::VST3q16odd_UPD: case ARM::VST3q32odd_UPD: - FirstOpnd = 5; + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; @@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16: case ARM::VST3LNq32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 3; Offset = 0; Stride = 2; @@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16odd: case ARM::VST3LNq32odd: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; case ARM::VST4q8_UPD: case ARM::VST4q16_UPD: case ARM::VST4q32_UPD: - FirstOpnd = 5; + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; @@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4q8odd_UPD: case ARM::VST4q16odd_UPD: case ARM::VST4q32odd_UPD: - FirstOpnd = 5; + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2; @@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16: case ARM::VST4LNq32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; Offset = 0; Stride = 2; @@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16odd: case ARM::VST4LNq32odd: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; Offset = 1; Stride = 2;