Use movw+movt in ARMFastISel::ARMMaterializeGV.

This eliminates a lot of constant pool entries for -O0 builds of code
with many global variable accesses.

This speeds up -O0 codegen of consumer-typeset by 2x because the
constant island pass no longer has to look at thousands of constant pool
entries.

<rdar://problem/10629774>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147712 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen 2012-01-07 01:47:05 +00:00
parent f231a6dc7f
commit 45ca7c6336
3 changed files with 78 additions and 43 deletions

View File

@ -617,40 +617,61 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
// TODO: Need more magic for ARM PIC.
if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(GV->getType());
if (Align == 0) {
// TODO: Figure out if this is correct.
Align = TD.getTypeAllocSize(GV->getType());
}
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
unsigned Id = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
ARMCP::CPValue,
PCAdj);
unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
// Load value.
MachineInstrBuilder MIB;
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2) {
unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx);
if (RelocM == Reloc::PIC_)
MIB.addImm(Id);
// Use movw+movt when possible, it avoids constant pool entries.
if (Subtarget->isTargetDarwin() && Subtarget->useMovt()) {
unsigned Opc;
switch (RelocM) {
case Reloc::PIC_:
Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
break;
case Reloc::DynamicNoPIC:
Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
break;
default:
Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
break;
}
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
DestReg).addGlobalAddress(GV));
} else {
// The extra immediate is for addrmode2.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
.addImm(0);
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(GV->getType());
if (Align == 0) {
// TODO: Figure out if this is correct.
Align = TD.getTypeAllocSize(GV->getType());
}
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
(Subtarget->isThumb() ? 4 : 8);
unsigned Id = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
ARMCP::CPValue,
PCAdj);
unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
// Load value.
MachineInstrBuilder MIB;
if (isThumb2) {
unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx);
if (RelocM == Reloc::PIC_)
MIB.addImm(Id);
} else {
// The extra immediate is for addrmode2.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
.addImm(0);
}
AddOptionalDefs(MIB);
}
AddOptionalDefs(MIB);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,

View File

@ -6,14 +6,16 @@
define void @t1() nounwind ssp {
; ARM: t1
; ARM: ldr r0, LCPI0_0
; ARM: movw r0, :lower16:_message1
; ARM: movt r0, :upper16:_message1
; ARM: add r0, r0, #5
; ARM: movw r1, #64
; ARM: movw r2, #10
; ARM: uxtb r1, r1
; ARM: bl _memset
; THUMB: t1
; THUMB: ldr.n r0, LCPI0_0
; THUMB: movw r0, :lower16:_message1
; THUMB: movt r0, :upper16:_message1
; THUMB: adds r0, #5
; THUMB: movs r1, #64
; THUMB: movt r1, #0
@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
define void @t2() nounwind ssp {
; ARM: t2
; ARM: ldr r0, LCPI1_0
; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
@ -39,7 +42,8 @@ define void @t2() nounwind ssp {
; ARM: ldr r1, [sp] @ 4-byte Reload
; ARM: bl _memcpy
; THUMB: t2
; THUMB: ldr.n r0, LCPI1_0
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
define void @t3() nounwind ssp {
; ARM: t3
; ARM: ldr r0, LCPI2_0
; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
@ -63,7 +68,8 @@ define void @t3() nounwind ssp {
; ARM: mov r0, r1
; ARM: bl _memmove
; THUMB: t3
; THUMB: ldr.n r0, LCPI2_0
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
@ -77,9 +83,11 @@ define void @t3() nounwind ssp {
define void @t4() nounwind ssp {
; ARM: t4
; ARM: ldr r0, LCPI3_0
; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r0, [r0]
; ARM: ldr r1, LCPI3_1
; ARM: movw r1, :lower16:L_temp$non_lazy_ptr
; ARM: movt r1, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r1, [r1]
; ARM: ldr r2, [r1, #16]
; ARM: str r2, [r0, #4]
@ -88,9 +96,11 @@ define void @t4() nounwind ssp {
; ARM: ldrh r1, [r1, #24]
; ARM: strh r1, [r0, #12]
; ARM: bx lr
; THUMB: ldr.n r0, LCPI3_0
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r0, [r0]
; THUMB: ldr.n r1, LCPI3_1
; THUMB: movw r1, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r1, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r1, [r1]
; THUMB: ldr r2, [r1, #16]
; THUMB: str r2, [r0, #4]

View File

@ -142,19 +142,23 @@ define void @test4() {
store i32 %b, i32* @test4g
ret void
; THUMB: ldr.n r0, LCPI4_1
; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
; THUMB: ldr r0, [r0]
; THUMB: ldr r0, [r0]
; THUMB: adds r0, #1
; THUMB: ldr.n r1, LCPI4_0
; THUMB: movw r1, :lower16:L_test4g$non_lazy_ptr
; THUMB: movt r1, :upper16:L_test4g$non_lazy_ptr
; THUMB: ldr r1, [r1]
; THUMB: str r0, [r1]
; ARM: ldr r0, LCPI4_1
; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
; ARM: ldr r0, [r0]
; ARM: ldr r0, [r0]
; ARM: add r0, r0, #1
; ARM: ldr r1, LCPI4_0
; ARM: movw r1, :lower16:L_test4g$non_lazy_ptr
; ARM: movt r1, :upper16:L_test4g$non_lazy_ptr
; ARM: ldr r1, [r1]
; ARM: str r0, [r1]
}