diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9c5f3aab9b8..911b84dc7de 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -688,7 +688,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned OpNum = Ops[0]; unsigned Opc = MI->getOpcode(); MachineInstr *NewMI = NULL; - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { + if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { // FIXME: tMOVgpr2gpr etc.? // If it is updating CPSR, then it cannot be folded. if (MI->getOperand(4).getReg() != ARM::CPSR || MI->getOperand(4).isDead()) { unsigned Pred = MI->getOperand(2).getImm(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 97b9ad1dc3b..a72e9dd3bd6 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1289,11 +1289,12 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AFI->getDPRCalleeSavedAreaOffset()|| hasFP(MF)) { if (NumBytes) { - unsigned SUBriOpc = isARM ? ARM::SUBri : ARM::t2SUBri; - BuildMI(MBB, MBBI, dl, TII.get(SUBriOpc), ARM::SP) - .addReg(FramePtr) - .addImm(NumBytes) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); } else { // Thumb2 or ARM. unsigned MOVrOpc = isARM ? ARM::MOVr : ARM::t2MOVr; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index d996b24ddf0..04f4dd3ca49 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -122,6 +122,8 @@ private: SDNode *SelectARMIndexedLoad(SDValue Op); SDNode *SelectT2IndexedLoad(SDValue Op); + /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. + SDNode *SelectDYN_ALLOC(SDValue Op); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -868,6 +870,59 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { return NULL; } +SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + MVT VT = Op.getValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); + int32_t AlignVal = cast(Align)->getSExtValue(); + if (AlignVal < 0) + // We need to align the stack. Use Thumb1 tAND which is the only thumb + // instruction that can read and write SP. This matches to a pseudo + // instruction that has a chain to ensure the result is written back to + // the stack pointer. + SP = SDValue(CurDAG->getTargetNode(ARM::tANDsp, dl, VT, SP, Align), 0); + + bool isC = isa(Size); + uint32_t C = isC ? cast(Size)->getZExtValue() : ~0UL; + // Handle the most common case for both Thumb1 and Thumb2: + // tSUBspi - immediate is between 0 ... 508 inclusive. + if (C <= 508 && ((C & 3) == 0)) + // FIXME: tSUBspi encode scale 4 implicitly. + return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP, + CurDAG->getTargetConstant(C/4, MVT::i32), + Chain); + + if (Subtarget->isThumb1Only()) { + // Use tADDrSPr since Thumb1 does not have a sub r, sp, r. ARMISelLowering + // should have negated the size operand already. FIXME: We can't insert + // new target independent node at this stage so we are forced to negate + // it earlier. Is there a better solution? + return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size, + Chain); + } else if (Subtarget->isThumb2()) { + if (isC && Predicate_t2_so_imm(Size.getNode())) { + // t2SUBrSPi + SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3); + } else if (isC && Predicate_imm0_4095(Size.getNode())) { + // t2SUBrSPi12 + SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3); + } else { + // t2SUBrSPs + SDValue Ops[] = { SP, Size, + getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4); + } + } + + // FIXME: Add ADD / SUB sp instructions for ARM. + return 0; +} SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); @@ -941,25 +996,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } - case ISD::ADD: { - if (!Subtarget->isThumb1Only()) - break; - // Select add sp, c to tADDhirr. - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - RegisterSDNode *LHSR = dyn_cast(Op.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(Op.getOperand(1)); - if (LHSR && LHSR->getReg() == ARM::SP) { - std::swap(N0, N1); - std::swap(LHSR, RHSR); - } - if (RHSR && RHSR->getReg() == ARM::SP) { - SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVtgpr2gpr, dl, - Op.getValueType(), N0, N0),0); - return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1); - } - break; - } + case ARMISD::DYN_ALLOC: + return SelectDYN_ALLOC(Op); case ISD::MUL: if (Subtarget->isThumb1Only()) break; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0bfe213cf04..4922f7d12e7 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -307,7 +307,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->isThumb()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { @@ -435,6 +438,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -1398,6 +1403,53 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + unsigned AlignVal = cast(Align)->getZExtValue(); + unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); + if (AlignVal > StackAlign) + // Do this now since selection pass cannot introduce new target + // independent node. + Align = DAG.getConstant(-(uint64_t)AlignVal, VT); + + // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up + // using a "add r, sp, r" instead. Negate the size now so we don't have to + // do even more horrible hack later. + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + if (AFI->isThumb1OnlyFunction()) { + bool Negate = true; + ConstantSDNode *C = dyn_cast(Size); + if (C) { + uint32_t Val = C->getZExtValue(); + if (Val <= 508 && ((Val & 3) == 0)) + Negate = false; + } + if (Negate) + Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); + } + + SDVTList VTList = DAG.getVTList(VT, MVT::Other); + SDValue Ops1[] = { Chain, Size, Align }; + SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); + Chain = Res.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + SDValue Ops2[] = { Res, Chain }; + return DAG.getMergeValues(Ops2, 2, dl); +} + SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, @@ -2396,6 +2448,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -2454,7 +2507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { - default: assert(false && "Unexpected instr type to insert"); + default: + llvm_unreachable("Unexpected instr type to insert"); case ARM::tMOVCCr: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -2509,6 +2563,78 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } + + case ARM::tANDsp: + case ARM::tADDspr_: + case ARM::tSUBspi_: + case ARM::t2SUBrSPi_: + case ARM::t2SUBrSPi12_: + case ARM::t2SUBrSPs_: { + MachineFunction *MF = BB->getParent(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + bool DstIsDead = MI->getOperand(0).isDead(); + bool SrcIsKill = MI->getOperand(1).isKill(); + + if (SrcReg != ARM::SP) { + // Copy the source to SP from virtual register. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); + unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) + ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; + BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + } + + unsigned OpOpc = 0; + bool NeedPred = false, NeedCC = false, NeedOp3 = false; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unexpected pseudo instruction!"); + case ARM::tANDsp: + OpOpc = ARM::tAND; + NeedPred = true; + break; + case ARM::tADDspr_: + OpOpc = ARM::tADDspr; + break; + case ARM::tSUBspi_: + OpOpc = ARM::tSUBspi; + break; + case ARM::t2SUBrSPi_: + OpOpc = ARM::t2SUBrSPi; + NeedPred = true; NeedCC = true; + break; + case ARM::t2SUBrSPi12_: + OpOpc = ARM::t2SUBrSPi12; + NeedPred = true; + break; + case ARM::t2SUBrSPs_: + OpOpc = ARM::t2SUBrSPs; + NeedPred = true; NeedCC = true; NeedOp3 = true; + break; + } + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); + if (OpOpc == ARM::tAND) + AddDefaultT1CC(MIB); + MIB.addReg(ARM::SP); + MIB.addOperand(MI->getOperand(2)); + if (NeedOp3) + MIB.addOperand(MI->getOperand(3)); + if (NeedPred) + AddDefaultPred(MIB); + if (NeedCC) + AddDefaultCC(MIB); + + // Copy the result from SP to virtual register. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); + unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) + ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; + BuildMI(BB, dl, TII->get(CopyOpc)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(ARM::SP); + MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } } } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4fe4d8bf943..4649c18de8d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -65,11 +65,13 @@ namespace llvm { FMRRD, // double to two gprs. FMDRR, // Two gprs to double. - EH_SJLJ_SETJMP, // SjLj exception handling setjmp - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. THREAD_POINTER, + DYN_ALLOC, // Dynamic allocation on the stack. + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -255,6 +257,7 @@ namespace llvm { SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 5dfc4fc25a8..9b54e67bedf 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -138,18 +138,37 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALU, "add $dst, pc, $rhs * 4", []>; // ADD rd, sp, #imm8 -// FIXME: hard code sp? def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALU, "add $dst, $sp, $rhs * 4 @ addrspi", []>; // ADD sp, sp, #imm7 -// FIXME: hard code sp? -def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, "add $dst, $rhs * 4", []>; -// FIXME: Make use of the following? +// SUB sp, sp, #imm7 +def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, + "sub $dst, $rhs * 4", []>; + // ADD rm, sp, rm +def tADDrSPr : TI<(outs GPR:$dst), (ins GPR:$sp, GPR:$rhs), IIC_iALU, + "add $dst, $sp, $rhs", []>; + // ADD sp, rm +def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALU, + "add $dst, $rhs", []>; + +// Pseudo instruction that will expand into a tSUBspi + a copy. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + NoItinerary, "@ sub $dst, $rhs * 4", []>; + +def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + NoItinerary, "@ add $dst, $rhs", []>; + +let Defs = [CPSR] in +def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + NoItinerary, "@ and $dst, $rhs", []>; +} // usesCustomDAGSchedInserter //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -549,9 +568,6 @@ def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, // TODO: A7-96: STMIA - store multiple. -def tSUBspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, - "sub $dst, $rhs * 4", []>; - // sign-extend byte def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALU, "sxtb", " $dst, $src", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9305c8ad6b6..11b0454802c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -75,7 +75,8 @@ def imm1_31 : PatLeaf<(i32 imm), [{ }]>; /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : PatLeaf<(i32 imm), [{ +def imm0_4095 : Operand, + PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 4096; }]>; @@ -239,7 +240,7 @@ multiclass T2I_bin_ii12rs { opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // 12-bit imm - def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, + def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALU, !strconcat(opc, "w"), " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>; // register @@ -431,6 +432,39 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), IIC_iALU, "adr$p.w $dst, #${label}_${id:no_hash}", []>; + +// ADD r, sp, {so_imm|i12} +def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU, + "add", ".w $dst, $sp, $imm", []>; +def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU, + "addw", " $dst, $sp, $imm", []>; + +// ADD r, sp, so_reg +def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU, + "add", ".w $dst, $sp, $rhs", []>; + +// SUB r, sp, {so_imm|i12} +def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU, + "sub", ".w $dst, $sp, $imm", []>; +def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU, + "subw", " $dst, $sp, $imm", []>; + +// SUB r, sp, so_reg +def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU, + "sub", " $dst, $sp, $rhs", []>; + + +// Pseudo instruction that will expand into a t2SUBrSPi + a copy. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), + NoItinerary, "@ sub.w $dst, $sp, $imm", []>; +def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), + NoItinerary, "@ subw $dst, $sp, $imm", []>; +def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), + NoItinerary, "@ sub $dst, $sp, $rhs", []>; +} // usesCustomDAGSchedInserter + + //===----------------------------------------------------------------------===// // Load / store Instructions. // diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a81b790f9dc..ea80e47589d 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -206,9 +206,13 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NewBase == 0) return false; } - int BaseOpc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; + int BaseOpc = !isThumb2 + ? ARM::ADDri + : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); if (Offset < 0) { - BaseOpc = isThumb2 ? ARM::t2SUBri : ARM::SUBri; + BaseOpc = !isThumb2 + ? ARM::SUBri + : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); Offset = - Offset; } int ImmedOffset = isThumb2 @@ -329,6 +333,9 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2SUBri && + MI->getOpcode() != ARM::t2SUBrSPi && + MI->getOpcode() != ARM::t2SUBrSPi12 && + MI->getOpcode() != ARM::tSUBspi && MI->getOpcode() != ARM::SUBri) return false; @@ -336,9 +343,10 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes <= 0 || (Limit && Bytes >= Limit)) return false; + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME return (MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - MI->getOperand(2).getImm() == Bytes && + (MI->getOperand(2).getImm()*Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg); } @@ -350,6 +358,9 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2ADDri && + MI->getOpcode() != ARM::t2ADDrSPi && + MI->getOpcode() != ARM::t2ADDrSPi12 && + MI->getOpcode() != ARM::tADDspi && MI->getOpcode() != ARM::ADDri) return false; @@ -357,9 +368,10 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; + unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME return (MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - MI->getOperand(2).getImm() == Bytes && + (MI->getOperand(2).getImm()*Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg); } diff --git a/lib/Target/ARM/README-Thumb2.txt b/lib/Target/ARM/README-Thumb2.txt index 651d39311e0..48b52781228 100644 --- a/lib/Target/ARM/README-Thumb2.txt +++ b/lib/Target/ARM/README-Thumb2.txt @@ -1,3 +1,7 @@ //===---------------------------------------------------------------------===// // Random ideas for the ARM backend (Thumb2 specific). //===---------------------------------------------------------------------===// + +We should be using ADD / SUB rd, sp, rm instructions. + +copyRegToReg should use tMOVgpr2gpr instead of t2MOVr? diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index cf2d09912d4..2b329e03db3 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -65,11 +65,15 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (DestRC == ARM::GPRRegisterClass && SrcRC == ARM::GPRRegisterClass) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr), - DestReg).addReg(SrcReg))); + // FIXME: Just use tMOVgpr2gpr since it's shorter? + if (SrcReg == ARM::SP || DestReg == ARM::SP) + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); + else + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr), + DestReg).addReg(SrcReg))); return true; } else if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::tGPRRegisterClass) { + SrcRC == ARM::tGPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); return true; } else if (DestRC == ARM::tGPRRegisterClass && @@ -162,26 +166,62 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } while (NumBytes) { - unsigned Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; unsigned ThisVal = NumBytes; - if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { - NumBytes = 0; - } else if (ThisVal < 4096) { - Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; - NumBytes = 0; + unsigned Opc = 0; + if (DestReg == ARM::SP && BaseReg != ARM::SP) { + // mov sp, rn. Note t2MOVr cannot be used. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg); + BaseReg = ARM::SP; + continue; + } + + if (BaseReg == ARM::SP) { + // sub sp, sp, #imm7 + if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { + assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + // FIXME: Fix Thumb1 immediate encoding. + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg).addImm(ThisVal/4); + NumBytes = 0; + continue; + } + + // sub rd, sp, so_imm + Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } } else { - // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = CountLeadingZeros_32(ThisVal); - ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); - NumBytes &= ~ThisVal; - assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && - "Bit extraction didn't work?"); + assert(DestReg != ARM::SP && BaseReg != ARM::SP); + Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else if (ThisVal < 4096) { + Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } } // Build the new ADD / SUB. - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal))); + BaseReg = DestReg; } } @@ -288,7 +328,6 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); - unsigned NewOpc = Opcode; const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; @@ -299,9 +338,12 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); + + bool isSP = FrameReg == ARM::SP; if (Offset == 0) { // Turn it into a move. - MI.setDesc(TII.get(ARM::t2MOVr)); + unsigned NewOpc = isSP ? ARM::tMOVgpr2gpr : ARM::t2MOVr; + MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.RemoveOperand(FrameRegIdx+1); return 0; @@ -310,23 +352,23 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if (Offset < 0) { Offset = -Offset; isSub = true; - MI.setDesc(TII.get(ARM::t2SUBri)); + MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri)); + } else { + MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri)); } // Common case: small offset, fits into instruction. if (ARM_AM::getT2SOImmVal(Offset) != -1) { - NewOpc = isSub ? ARM::t2SUBri : ARM::t2ADDri; - if (NewOpc != Opcode) - MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); return 0; } // Another common case: imm12. if (Offset < 4096) { - NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; - if (NewOpc != Opcode) - MI.setDesc(TII.get(NewOpc)); + unsigned NewOpc = isSP + ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12) + : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12); + MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); return 0; @@ -346,7 +388,7 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } else { // AddrModeT2_so cannot handle any offset. If there is no offset // register then we change to an immediate version. - NewOpc = Opcode; + unsigned NewOpc = Opcode; if (AddrMode == ARMII::AddrModeT2_so) { unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); if (OffsetReg != 0) { diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll new file mode 100644 index 00000000000..bdc23bab7c8 --- /dev/null +++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s +; PR4659 +; PR4682 + +define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind { +entry: +; CHECK: __gcov_execlp: +; CHECK: mov sp, r7 +; CHECK: sub sp, #1 * 4 + call arm_aapcscc void @__gcov_flush() nounwind + br i1 undef, label %bb5, label %bb + +bb: ; preds = %bb, %entry + br i1 undef, label %bb5, label %bb + +bb5: ; preds = %bb, %entry + %0 = alloca i8*, i32 undef, align 4 ; [#uses=1] + %1 = call arm_aapcscc i32 @execvp(i8* %path, i8** %0) nounwind ; [#uses=1] + ret i32 %1 +} + +declare hidden arm_aapcscc void @__gcov_flush() + +declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll index 60604f020ff..d183da44c12 100644 --- a/test/CodeGen/Thumb2/large-stack.ll +++ b/test/CodeGen/Thumb2/large-stack.ll @@ -2,7 +2,7 @@ define void @test1() { ; CHECK: test1: -; CHECK: sub.w sp, sp, #256 +; CHECK: sub sp, #64 * 4 %tmp = alloca [ 64 x i32 ] , align 4 ret void } @@ -10,7 +10,7 @@ define void @test1() { define void @test2() { ; CHECK: test2: ; CHECK: sub.w sp, sp, #4160 -; CHECK: sub.w sp, sp, #8 +; CHECK: sub sp, #2 * 4 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } @@ -18,7 +18,7 @@ define void @test2() { define i32 @test3() { ; CHECK: test3: ; CHECK: sub.w sp, sp, #805306368 -; CHECK: sub.w sp, sp, #16 +; CHECK: sub sp, #4 * 4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 %a = alloca [805306369 x i8], align 16