From 53519f015e3e84e9f57b677cc8724805a6009b73 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 21 Jan 2011 18:55:51 +0000 Subject: [PATCH] Last round of fixes for movw + movt global address codegen. 1. Fixed ARM pc adjustment. 2. Fixed dynamic-no-pic codegen 3. CSE of pc-relative load of global addresses. It's now enabled by default for Darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123991 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 66 ++++++++++++++++--------- lib/Target/ARM/ARMBaseInfo.h | 11 +++++ lib/Target/ARM/ARMBaseInstrInfo.cpp | 22 ++++++--- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 55 +++++++++++++-------- lib/Target/ARM/ARMISelLowering.cpp | 7 ++- lib/Target/ARM/ARMISelLowering.h | 2 + lib/Target/ARM/ARMInstrInfo.td | 24 +++++---- lib/Target/ARM/ARMInstrThumb2.td | 18 ++++--- lib/Target/ARM/ARMSubtarget.cpp | 8 +-- test/CodeGen/ARM/load-global.ll | 50 +++++++++++++++++++ test/CodeGen/ARM/machine-licm.ll | 6 +-- test/CodeGen/ARM/tail-opts.ll | 9 ++-- test/CodeGen/Thumb2/load-global.ll | 23 --------- test/CodeGen/Thumb2/machine-licm.ll | 15 +++--- test/CodeGen/Thumb2/thumb2-ifcvt3.ll | 1 - 15 files changed, 202 insertions(+), 115 deletions(-) create mode 100644 test/CodeGen/ARM/load-global.ll delete mode 100644 test/CodeGen/Thumb2/load-global.ll diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 71a471dac30..ed3023a31de 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -863,25 +863,34 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::MOVi16_pic_ga: - case ARM::t2MOVi16_pic_ga: { + case ARM::MOVi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; - TmpInst.setOpcode(Opc == ARM::MOVi16_pic_ga ? ARM::MOVi16 : ARM::t2MOVi16); + TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + unsigned TF = MI->getOperand(1).getTargetFlags(); + bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC; const GlobalValue *GV = MI->getOperand(1).getGlobal(); MCSymbol *GVSym = GetARMGVSymbol(GV); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), - getFunctionNumber(), MI->getOperand(2).getImm(), - OutContext); - const MCExpr *LabelSymExpr = MCSymbolRefExpr::Create(LabelSym, OutContext); - const MCExpr *PCRelExpr = - ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, - MCConstantExpr::Create(4, OutContext), + if (isPIC) { + MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(2).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = + ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, + MCBinaryExpr::CreateAdd(LabelSymExpr, + MCConstantExpr::Create(PCAdj, OutContext), OutContext), OutContext), OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); + TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); + } else { + const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); + } + // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); @@ -890,26 +899,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } - case ARM::MOVTi16_pic_ga: - case ARM::t2MOVTi16_pic_ga: { + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: { MCInst TmpInst; - TmpInst.setOpcode(Opc==ARM::MOVTi16_pic_ga ? ARM::MOVTi16 : ARM::t2MOVTi16); + TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel + ? ARM::MOVTi16 : ARM::t2MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + unsigned TF = MI->getOperand(2).getTargetFlags(); + bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC; const GlobalValue *GV = MI->getOperand(2).getGlobal(); MCSymbol *GVSym = GetARMGVSymbol(GV); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), - getFunctionNumber(), MI->getOperand(3).getImm(), - OutContext); - const MCExpr *LabelSymExpr = MCSymbolRefExpr::Create(LabelSym, OutContext); - const MCExpr *PCRelExpr = - ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, - MCConstantExpr::Create(4, OutContext), + if (isPIC) { + MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(3).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = + ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, + MCBinaryExpr::CreateAdd(LabelSymExpr, + MCConstantExpr::Create(PCAdj, OutContext), OutContext), OutContext), OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); + TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); + } else { + const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); + } // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h index d3831d2c541..6054bb773e7 100644 --- a/lib/Target/ARM/ARMBaseInfo.h +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -185,6 +185,17 @@ namespace ARMII { /// higher 16 bit of the address. Used only via movt instruction. MO_HI16, + /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a + /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, + /// i.e. "FOO$non_lazy_ptr". + /// Used only via movw instruction. + MO_LO16_NONLAZY, + + /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a + /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, + /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction. + MO_HI16_NONLAZY, + /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a /// relocation containing lower 16 bit of the PC relative address of the /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 4f44c8ea391..2268e59ea7b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -553,10 +553,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARMII::Size2Bytes: return 2; // Thumb1 instruction. case ARMII::SizeSpecial: { switch (Opc) { - case ARM::MOVi16_pic_ga: - case ARM::MOVTi16_pic_ga: - case ARM::t2MOVi16_pic_ga: - case ARM::t2MOVTi16_pic_ga: + case ARM::MOVi16_ga_pcrel: + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: return 4; case ARM::MOVi32imm: case ARM::t2MOVi32imm: @@ -1051,8 +1051,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || - Opcode == ARM::MOV_pic_ga_add_pc || - Opcode == ARM::t2MOV_pic_ga_add_pc) { + Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::MOV_ga_pcrel || + Opcode == ARM::MOV_ga_pcrel_ldr || + Opcode == ARM::t2MOV_ga_dyn || + Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; if (MI0->getNumOperands() != MI1->getNumOperands()) @@ -1063,8 +1066,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::MOV_pic_ga_add_pc || - Opcode == ARM::t2MOV_pic_ga_add_pc) + if (Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::MOV_ga_pcrel || + Opcode == ARM::MOV_ga_pcrel_ldr || + Opcode == ARM::t2MOV_ga_dyn || + Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 29d4e1c9d8e..4191d687284 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -832,42 +832,54 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::MOV_pic_ga_add_pc: - case ARM::MOV_pic_ga_ldr: - case ARM::t2MOV_pic_ga_add_pc: { - // Expand into movw + movw + add pc / ldr [pc] + case ARM::MOV_ga_dyn: + case ARM::MOV_ga_pcrel: + case ARM::MOV_ga_pcrel_ldr: + case ARM::t2MOV_ga_dyn: + case ARM::t2MOV_ga_pcrel: { + // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. unsigned LabelId = AFI->createPICLabelUId(); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = Opcode != ARM::t2MOV_pic_ga_add_pc; - unsigned LO16Opc = isARM ? ARM::MOVi16_pic_ga : ARM::t2MOVi16_pic_ga; - unsigned HI16Opc = isARM ? ARM::MOVTi16_pic_ga : ARM::t2MOVTi16_pic_ga; + bool isARM = Opcode != ARM::t2MOV_ga_pcrel; + bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); + unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; + unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel; + unsigned LO16TF = isPIC + ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY; + unsigned HI16TF = isPIC + ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY; unsigned PICAddOpc = isARM - ? (Opcode == ARM::MOV_pic_ga_ldr ? ARM::PICLDR : ARM::PICADD) + ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg) - .addGlobalAddress(GV, MO1.getOffset(), - TF | ARMII::MO_LO16_NONLAZY_PIC) - .addImm(LabelId); - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) - .addReg(DstReg) - .addGlobalAddress(GV, MO1.getOffset(), - TF | ARMII::MO_HI16_NONLAZY_PIC) + .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) .addImm(LabelId); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(HI16Opc), DstReg) + .addReg(DstReg) + .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) + .addImm(LabelId); + if (!isPIC) { + TransferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; + } + + MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg).addImm(LabelId); if (isARM) { - AddDefaultPred(MIB2); - if (Opcode == ARM::MOV_pic_ga_ldr) + AddDefaultPred(MIB3); + if (Opcode == ARM::MOV_ga_pcrel_ldr) (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } - TransferImpOps(MI, MIB1, MIB2); + TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); return true; } @@ -1219,9 +1231,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getTarget().getInstrInfo()); - TRI = MF.getTarget().getRegisterInfo(); - STI = &MF.getTarget().getSubtarget(); + const TargetMachine &TM = MF.getTarget(); + TII = static_cast(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); + STI = &TM.getSubtarget(); AFI = MF.getInfo(); bool Modified = false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a6e1fe79c19..91dca5fd592 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -751,6 +751,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; case ARMISD::Wrapper: return "ARMISD::Wrapper"; + case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::CALL: return "ARMISD::CALL"; @@ -1999,11 +2000,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. - if (RelocM != Reloc::PIC_) + if (RelocM == Reloc::Static) return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, + unsigned Wrapper = (RelocM == Reloc::PIC_) + ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; + SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9d105dadad8..28bf60c8c28 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -34,6 +34,8 @@ namespace llvm { Wrapper, // Wrapper - A wrapper node for TargetConstantPool, // TargetExternalSymbol, and TargetGlobalAddress. + WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in + // DYN mode. WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in // PIC mode. WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 854fe83fce4..765a714ca7e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -69,8 +69,9 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; -def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; +def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; +def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -1943,8 +1944,8 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm), let Inst{25} = 1; } -def MOVi16_pic_ga : PseudoInst<(outs GPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; +def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; let Constraints = "$src = $Rd" in { def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), @@ -1963,8 +1964,8 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), let Inst{25} = 1; } -def MOVTi16_pic_ga : PseudoInst<(outs GPR:$Rd), - (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; +def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), + (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; } // Constraints @@ -3412,18 +3413,23 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; -// Pseudo instruction that combines movw + movt + add pc. +// Pseudo instruction that combines movw + movt + add pc (if PIC). // It also makes it possible to rematerialize the instructions. // FIXME: Remove this when we can do generalized remat and when machine licm // can properly the instructions. let isReMaterializable = 1 in { -def MOV_pic_ga_add_pc : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2addpc, +def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsARM, UseMovt]>; +def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + let AddedComplexity = 10 in -def MOV_pic_ga_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), +def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), IIC_iMOVix2ld, [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, Requires<[IsARM, UseMovt]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6384461de81..55802bdc49b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1701,7 +1701,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, let Inst{7-0} = imm{7-0}; } -def t2MOVi16_pic_ga : PseudoInst<(outs rGPR:$Rd), +def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; let Constraints = "$src = $Rd" in { @@ -1726,7 +1726,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), let Inst{7-0} = imm{7-0}; } -def t2MOVTi16_pic_ga : PseudoInst<(outs rGPR:$Rd), +def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; } // Constraints @@ -3253,16 +3253,22 @@ def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set rGPR:$dst, (i32 imm:$src))]>, Requires<[IsThumb, HasV6T2]>; -// Pseudo instruction that combines movw + movt + add pc. +// Pseudo instruction that combines movw + movt + add pc (if pic). // It also makes it possible to rematerialize the instructions. // FIXME: Remove this when we can do generalized remat and when machine licm // can properly the instructions. -let isReMaterializable = 1 in -def t2MOV_pic_ga_add_pc : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2addpc, +let isReMaterializable = 1 in { +def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsThumb2, UseMovt]>; +def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, + Requires<[IsThumb2, UseMovt]>; +} + // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, Requires<[IsThumb2, DontUseMovt]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 366af55369f..0bd740cfb28 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -25,7 +25,7 @@ ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); static cl::opt -UseMOVT("arm-darwin-use-movt", cl::init(false), cl::Hidden); +DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); static cl::opt StrictAlign("arm-strict-align", cl::Hidden, @@ -150,11 +150,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, UseMovt = hasV6T2Ops(); else { IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); - if (UseMOVT && hasV6T2Ops()) { - unsigned Maj, Min, Rev; - TargetTriple.getDarwinNumber(Maj, Min, Rev); - UseMovt = Maj > 4; - } + UseMovt = DarwinUseMOVT && hasV6T2Ops(); } if (!isThumb() || hasThumb2()) diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll new file mode 100644 index 00000000000..15a415df731 --- /dev/null +++ b/test/CodeGen/ARM/load-global.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC +; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T +; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7 +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX + +@G = external global i32 + +define i32 @test1() { +; STATIC: _test1: +; STATIC: ldr r0, LCPI0_0 +; STATIC: ldr r0, [r0] +; STATIC: .long _G + +; DYNAMIC: _test1: +; DYNAMIC: ldr r0, LCPI0_0 +; DYNAMIC: ldr r0, [r0] +; DYNAMIC: ldr r0, [r0] +; DYNAMIC: .long L_G$non_lazy_ptr + +; PIC: _test1 +; PIC: ldr r0, LCPI0_0 +; PIC: ldr r0, [pc, r0] +; PIC: ldr r0, [r0] +; PIC: .long L_G$non_lazy_ptr-(LPC0_0+8) + +; PIC_T: _test1 +; PIC_T: ldr.n r0, LCPI0_0 +; PIC_T: add r0, pc +; PIC_T: ldr r0, [r0] +; PIC_T: ldr r0, [r0] +; PIC_T: .long L_G$non_lazy_ptr-(LPC0_0+4) + +; PIC_V7: _test1 +; PIC_V7: movw r0, :lower16:(L_G$non_lazy_ptr-(LPC0_0+8)) +; PIC_V7: movt r0, :upper16:(L_G$non_lazy_ptr-(LPC0_0+8)) +; PIC_V7: ldr r0, [pc, r0] +; PIC_V7: ldr r0, [r0] + +; LINUX: test1 +; LINUX: ldr r0, .LCPI0_0 +; LINUX: ldr r1, .LCPI0_1 +; LINUX: add r0, pc, r0 +; LINUX: ldr r0, [r1, r0] +; LINUX: ldr r0, [r0] +; LINUX: .long G(GOT) + %tmp = load i32* @G + ret i32 %tmp +} diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll index a0494134f06..8656c5bbd72 100644 --- a/test/CodeGen/ARM/machine-licm.ll +++ b/test/CodeGen/ARM/machine-licm.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -mtriple=armv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -arm-darwin-use-movt | FileCheck %s -check-prefix=MOVT +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6t2 | FileCheck %s -check-prefix=MOVT ; rdar://7353541 ; rdar://7354376 ; rdar://8887598 @@ -24,8 +24,8 @@ entry: ; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}] ; MOVT: t: -; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4)) -; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4)) +; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+8)) +; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+8)) ; MOVT: LPC0_0: ; MOVT: ldr r{{[0-9]+}}, [pc, [[REGISTER_2]]] ; MOVT: ldr r{{[0-9]+}}, [r{{[0-9]+}}] diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll index 17c8baedbfa..5b3dce386bb 100644 --- a/test/CodeGen/ARM/tail-opts.ll +++ b/test/CodeGen/ARM/tail-opts.ll @@ -17,13 +17,16 @@ declare i8* @choose(i8*, i8*) ; CHECK: tail_duplicate_me: ; CHECK: qux ; CHECK: qux -; CHECK: ldr r{{.}}, LCPI +; CHECK: movw r{{[0-9]+}}, :lower16:_GHJK +; CHECK: movt r{{[0-9]+}}, :upper16:_GHJK ; CHECK: str r ; CHECK-NEXT: bx r -; CHECK: ldr r{{.}}, LCPI +; CHECK: movw r{{[0-9]+}}, :lower16:_GHJK +; CHECK: movt r{{[0-9]+}}, :upper16:_GHJK ; CHECK: str r ; CHECK-NEXT: bx r -; CHECK: ldr r{{.}}, LCPI +; CHECK: movw r{{[0-9]+}}, :lower16:_GHJK +; CHECK: movt r{{[0-9]+}}, :upper16:_GHJK ; CHECK: str r ; CHECK-NEXT: bx r diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll deleted file mode 100644 index 46e053ca4ea..00000000000 --- a/test/CodeGen/Thumb2/load-global.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC -; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX - -@G = external global i32 - -define i32 @test1() { -; STATIC: _test1: -; STATIC: .long _G - -; DYNAMIC: _test1: -; DYNAMIC: .long L_G$non_lazy_ptr - -; PIC: _test1 -; PIC: add r0, pc -; PIC: .long L_G$non_lazy_ptr-(LPC0_0+4) - -; LINUX: test1 -; LINUX: .long G(GOT) - %tmp = load i32* @G - ret i32 %tmp -} diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index 37a15ff7b52..5e776dd8937 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -3,9 +3,6 @@ ; rdar://7353541 ; rdar://7354376 -; The generated code is no where near ideal. It's not recognizing the two -; constantpool entries being loaded can be merged into one. - @GV = external global i32 ; [#uses=2] define void @t1(i32* nocapture %vals, i32 %c) nounwind { @@ -17,21 +14,21 @@ entry: bb.nph: ; preds = %entry ; CHECK: BB#1 -; CHECK: ldr.n r2, LCPI0_0 +; CHECK: movw r2, :lower16:L_GV$non_lazy_ptr +; CHECK: movt r2, :upper16:L_GV$non_lazy_ptr ; CHECK: ldr r2, [r2] ; CHECK: ldr r3, [r2] ; CHECK: LBB0_2 -; CHECK: LCPI0_0: -; CHECK-NOT: LCPI0_1: +; CHECK-NOT: LCPI0_0: ; PIC: BB#1 -; PIC: ldr.n r2, LCPI0_0 +; PIC: movw r2, :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4)) +; PIC: movt r2, :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4)) ; PIC: add r2, pc ; PIC: ldr r2, [r2] ; PIC: ldr r3, [r2] ; PIC: LBB0_2 -; PIC: LCPI0_0: -; PIC-NOT: LCPI0_1: +; PIC-NOT: LCPI0_0: ; PIC: .section %.pre = load i32* @GV, align 4 ; [#uses=1] br label %bb diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll index cc2ef140d11..bcf10eff729 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll @@ -23,7 +23,6 @@ bb52: ; preds = %newFuncRoot ; CHECK: movne ; CHECK: moveq ; CHECK: pop -; CHECK-NEXT: @ BB#1: %0 = load i64* @posed, align 4 ; [#uses=3] %1 = sub i64 %0, %.reload78 ; [#uses=1] %2 = ashr i64 %1, 1 ; [#uses=3]