From 31b99dd76035067b1c3cc6c7e9d663b7b0210938 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 14 Aug 2009 18:31:44 +0000 Subject: [PATCH] Also shrink immediate branches; also more assembler workarounds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79014 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMConstantIslandPass.cpp | 61 +++++++++++++++++++++--- lib/Target/ARM/ARMInstrThumb.td | 9 ++-- test/CodeGen/Thumb2/thumb2-branch.ll | 61 ++++++++++++++++++++++++ test/CodeGen/Thumb2/tls2.ll | 22 ++++++--- 4 files changed, 136 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/Thumb2/thumb2-branch.ll diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index ea64bab79d1..f29247ef0cd 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -38,6 +38,7 @@ STATISTIC(NumCBrFixed, "Number of cond branches fixed"); STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); STATISTIC(NumTBs, "Number of table branches generated"); STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); +STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -479,8 +480,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, bool NegOk = false; bool IsSoImm = false; - // FIXME: Temporary workaround until I can figure out what's going on. - unsigned Slack = T2JumpTables.empty() ? 0 : 4; switch (Opc) { default: llvm_unreachable("Unknown addressing mode for CP reference!"); @@ -530,7 +529,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, // Remember that this is a user of a CP entry. unsigned CPI = I->getOperand(op).getIndex(); MachineInstr *CPEMI = CPEMIs[CPI]; - unsigned MaxOffs = ((1 << Bits)-1) * Scale - Slack; + unsigned MaxOffs = ((1 << Bits)-1) * Scale; CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); // Increment corresponding CPEntry reference count. @@ -714,11 +713,23 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, // purposes of the displacement computation; compensate for that here. // Effectively, the valid range of displacements is 2 bytes smaller for such // references. - if (isThumb && UserOffset%4 !=0) + unsigned TotalAdj = 0; + if (isThumb && UserOffset%4 !=0) { UserOffset -= 2; + TotalAdj = 2; + } // CPEs will be rounded up to a multiple of 4. - if (isThumb && TrialOffset%4 != 0) + if (isThumb && TrialOffset%4 != 0) { TrialOffset += 2; + TotalAdj += 2; + } + + // In Thumb2 mode, later branch adjustments can shift instructions up and + // cause alignment change. In the worst case scenario this can cause the + // user's effective address to be subtracted by 2 and the CPE's address to + // be plus 2. + if (isThumb2 && TotalAdj != 4) + MaxDisp -= (4 - TotalAdj); if (UserOffset <= TrialOffset) { // User before the Trial. @@ -1398,13 +1409,49 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { } } - MadeChange |= OptimizeThumb2JumpTables(MF); MadeChange |= OptimizeThumb2Branches(MF); + MadeChange |= OptimizeThumb2JumpTables(MF); return MadeChange; } bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { - return false; + bool MadeChange = false; + + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { + ImmBranch &Br = ImmBranches[i]; + unsigned Opcode = Br.MI->getOpcode(); + unsigned NewOpc = 0; + unsigned Scale = 1; + unsigned Bits = 0; + switch (Opcode) { + default: break; + case ARM::t2B: + NewOpc = ARM::tB; + Bits = 11; + Scale = 2; + break; + case ARM::t2Bcc: + NewOpc = ARM::tBcc; + Bits = 8; + Scale = 2; + break; + } + if (!NewOpc) + continue; + + unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + Br.MI->setDesc(TII->get(NewOpc)); + MachineBasicBlock *MBB = Br.MI->getParent(); + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumT2BrShrunk; + MadeChange = true; + } + } + + return MadeChange; } diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 3e4725f4ceb..48f3eee488c 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -250,7 +250,7 @@ let isBranch = 1, isTerminator = 1 in { let isBarrier = 1 in { let isPredicable = 1 in def tB : T1I<(outs), (ins brtarget:$target), IIC_Br, - "b $target", [(br bb:$target)]>; + "b.n $target", [(br bb:$target)]>; // Far jump let Defs = [LR] in @@ -268,7 +268,7 @@ let isBranch = 1, isTerminator = 1 in { // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br, - "b$cc $target", + "b$cc.n $target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>; //===----------------------------------------------------------------------===// @@ -310,9 +310,10 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad, "ldr", " $dst, $addr", []>; // Load tconstpool +// FIXME: Added .n suffix to workaround a Darwin assembler bug. let canFoldAsLoad = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoad, - "ldr", " $dst, $addr", + "ldr", ".n $dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. @@ -628,7 +629,7 @@ def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALU, - "adr$p.n $dst, #$label", []>; + "adr$p $dst, #$label", []>; def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, lane_cst:$id, pred:$p), diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll new file mode 100644 index 00000000000..1dcaac09fd8 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-branch.ll @@ -0,0 +1,61 @@ +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 -disable-arm-if-conversion | FileCheck %s + +define void @f1(i32 %a, i32 %b, i32* %v) { +entry: +; CHECK: f1: +; CHECK bne LBB + %tmp = icmp eq i32 %a, %b ; [#uses=1] + br i1 %tmp, label %cond_true, label %return + +cond_true: ; preds = %entry + store i32 0, i32* %v + ret void + +return: ; preds = %entry + ret void +} + +define void @f2(i32 %a, i32 %b, i32* %v) { +entry: +; CHECK: f2: +; CHECK bge LBB + %tmp = icmp slt i32 %a, %b ; [#uses=1] + br i1 %tmp, label %cond_true, label %return + +cond_true: ; preds = %entry + store i32 0, i32* %v + ret void + +return: ; preds = %entry + ret void +} + +define void @f3(i32 %a, i32 %b, i32* %v) { +entry: +; CHECK: f3: +; CHECK bhs LBB + %tmp = icmp ult i32 %a, %b ; [#uses=1] + br i1 %tmp, label %cond_true, label %return + +cond_true: ; preds = %entry + store i32 0, i32* %v + ret void + +return: ; preds = %entry + ret void +} + +define void @f4(i32 %a, i32 %b, i32* %v) { +entry: +; CHECK: f4: +; CHECK blo LBB + %tmp = icmp ult i32 %a, %b ; [#uses=1] + br i1 %tmp, label %return, label %cond_true + +cond_true: ; preds = %entry + store i32 0, i32* %v + ret void + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll index 3396b0ba43f..7187ca13043 100644 --- a/test/CodeGen/Thumb2/tls2.ll +++ b/test/CodeGen/Thumb2/tls2.ll @@ -1,19 +1,29 @@ -; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \ -; RUN: grep {i(gottpoff)} -; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \ -; RUN: grep {ldr r., \[pc, r.\]} -; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \ -; RUN: -relocation-model=pic | grep {__tls_get_addr} +; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | FileCheck %s -check-prefix=CHECK-NOT-PIC +; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC @i = external thread_local global i32 ; [#uses=2] define i32 @f() { entry: +; CHECK-NOT-PIC: f: +; CHECK-NOT-PIC: add r0, pc +; CHECK-NOT-PIC: ldr r1, [r0] +; CHECK-NOT-PIC: i(gottpoff) + +; CHECK-PIC: f: +; CHECK-PIC: bl __tls_get_addr(PLT) %tmp1 = load i32* @i ; [#uses=1] ret i32 %tmp1 } define i32* @g() { entry: +; CHECK-NOT-PIC: g: +; CHECK-NOT-PIC: add r0, pc +; CHECK-NOT-PIC: ldr r1, [r0] +; CHECK-NOT-PIC: i(gottpoff) + +; CHECK-PIC: g: +; CHECK-PIC: bl __tls_get_addr(PLT) ret i32* @i }