From 10b5086e6e945b830ff909821240eff5c4a42bfc Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Thu, 5 Sep 2013 11:10:31 +0000 Subject: [PATCH] ARM: Add GPR register class excluding LR for use with the ADR instruction. This improves code generation for jump tables by avoiding the emission of "mov pc, lr" which could fool the processor into believing this is a return from a function causing mispredicts. The code generation logic for jump tables uses ADR to materialize the address of the jump target. Patch by Daniel Stewart! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/ARMRegisterInfo.td | 8 ++ lib/Target/ARM/Thumb2InstrInfo.cpp | 16 +++- .../CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll | 76 +++++++++++++++++++ 4 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5d0c48443ac..2f3fa63a41a 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1233,7 +1233,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in -def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), +def t2LEApcrelJT : t2PseudoInst<(outs jtGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 90c6a965acf..51ecaf7c465 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -240,6 +240,14 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { }]; } +// jtGPR - Jump Table General Purpose Registers. +// Used by the Thumb2 instructions to prevent Thumb2 jump tables +// from using the LR. The implementation of the jump table uses a mov pc, rA +// type instruction to jump into the table. Use of the LR register (as in +// mov pc, lr) can cause the ARM branch predictor to think it is returning +// from a function instead. This causes a mispredict and a pipe flush. +def jtGPR : RegisterClass<"ARM", [i32], 32, (sub rGPR, LR)>; + // Thumb registers are R0-R7 normally. Some instructions can still use // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 286eaa0946d..7c51c70f679 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -152,7 +152,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC, + &ARM::rGPRRegClass, + ARM::gsub_1); + assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass"); + const TargetRegisterClass* ConstrainedClass = + MRI->constrainRegClass(SrcReg, TargetClass); + assert(ConstrainedClass && "Couldn't constrain the register class"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); @@ -193,7 +199,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC, + &ARM::rGPRRegClass, + ARM::gsub_1); + assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass"); + const TargetRegisterClass* ConstrainedClass = + MRI->constrainRegClass(DestReg, TargetClass); + assert(ConstrainedClass && "Couldn't constrain the register class"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); diff --git a/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll b/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll new file mode 100644 index 00000000000..9c543b52092 --- /dev/null +++ b/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + +define i32 @foo(i32 %n, i32* nocapture %inp) #0 { +; CHECK: foo +; CHECK-NOT: mov pc, lr +.split: + %0 = icmp sgt i32 %n, 1 + %1 = add nsw i32 %n, -1 + %loop_guard = icmp sgt i32 %1, 0 + %or.cond = and i1 %0, %loop_guard + br i1 %or.cond, label %stmt.preheader, label %loop_exit + +stmt.preheader: ; preds = %.split + %adjust_ub = add i32 %n, -2 + %scevgep6.gep = getelementptr i32* %inp, i32 1 + %2 = icmp sgt i32 %adjust_ub, 0 + %adjust_ub.op = add i32 %n, -1 + %3 = select i1 %2, i32 %adjust_ub.op, i32 1 + %xtraiter = and i32 %3, 3 + switch i32 %xtraiter, label %stmt.unr [ + i32 0, label %stmt. + i32 1, label %stmt.unr30 + i32 2, label %stmt.unr16 + i32 3, label %stmt.unr8 + ] + +stmt.unr: ; preds = %stmt.preheader + %scevgep6.inc.unr = getelementptr i32* %inp, i32 2 + br label %stmt.unr8 + +stmt.unr8: ; preds = %stmt.preheader, %stmt.unr + %imax.03.reg2mem.0.unr = phi i32 [ 1, %stmt.unr ], [ 0, %stmt.preheader ] + %scevgep6.phi.unr = phi i32* [ %scevgep6.inc.unr, %stmt.unr ], [ %scevgep6.gep, %stmt.preheader ] + %scevgep6.inc.unr15 = getelementptr i32* %scevgep6.phi.unr, i32 1 + br label %stmt.unr16 + +stmt.unr16: ; preds = %stmt.preheader, %stmt.unr8 + %imax.03.reg2mem.0.unr17 = phi i32 [ 0, %stmt.unr8 ], [ 0, %stmt.preheader ] + %selv.lcssa.reg2mem.1.unr18 = phi i32 [ 0, %stmt.unr8 ], [ undef, %stmt.preheader ] + %scevgep6.phi.unr19 = phi i32* [ %scevgep6.inc.unr15, %stmt.unr8 ], [ %scevgep6.gep, %stmt.preheader ] + %indvar.unr20 = phi i32 [ 1, %stmt.unr8 ], [ 0, %stmt.preheader ] + %scevgep6.inc.unr27 = getelementptr i32* %scevgep6.phi.unr19, i32 1 + br label %stmt.unr30 + +stmt.unr30: ; preds = %stmt.preheader, %stmt.unr16 + %imax.03.reg2mem.0.unr31 = phi i32 [ 1, %stmt.unr16 ], [ 0, %stmt.preheader ] + %selv.lcssa.reg2mem.1.unr32 = phi i32 [ 0, %stmt.unr16 ], [ undef, %stmt.preheader ] + %scevgep6.phi.unr33 = phi i32* [ %scevgep6.inc.unr27, %stmt.unr16 ], [ %scevgep6.gep, %stmt.preheader ] + %indvar.unr34 = phi i32 [ 0, %stmt.unr16 ], [ 1, %stmt.preheader ] + %_p_scalar_.unr36 = load i32* %scevgep6.phi.unr33, align 4 + %p_.unr37 = icmp sgt i32 %_p_scalar_.unr36, %imax.03.reg2mem.0.unr31 + %scevgep6.inc.unr41 = getelementptr i32* %scevgep6.phi.unr33, i32 1 + %4 = icmp ugt i32 %3, 4 + br i1 %4, label %stmt., label %loop_exit + + +loop_exit: ; preds = %stmt.unr30, %stmt., %.split + %itemp.0.lcssa.reg2mem.0 = phi i32 [ undef, %.split ], [ 1, %stmt.unr30 ], [0, %stmt. ] + ret i32 %itemp.0.lcssa.reg2mem.0 + +stmt.: ; preds = %stmt.preheader, %stmt.unr30, %stmt. + %imax.03.reg2mem.0 = phi i32 [ %p_selv2.3, %stmt. ], [ 1, %stmt.unr30 ], [ 0, %stmt.preheader ] + %selv.lcssa.reg2mem.1 = phi i32 [ 0, %stmt. ], [ 1, %stmt.unr30 ], [ undef, %stmt.preheader ] + %scevgep6.phi = phi i32* [ %scevgep6.inc.3, %stmt. ], [ %scevgep6.inc.unr41, %stmt.unr30 ], [ %scevgep6.gep, %stmt.preheader ] + %indvar = phi i32 [ %scevgep.sum.3, %stmt. ], [ 1, %stmt.unr30 ], [ 0, %stmt.preheader ] + %scevgep.sum = add i32 %indvar, 1 + %_p_scalar_ = load i32* %scevgep6.phi, align 4 + %p_ = icmp sgt i32 %_p_scalar_, %imax.03.reg2mem.0 + %p_selv = select i1 %p_, i32 %scevgep.sum, i32 %selv.lcssa.reg2mem.1 + %scevgep.sum.3 = add i32 %indvar, 4 + %p_selv2.3 = select i1 %p_, i32 %_p_scalar_, i32 %p_selv + %scevgep6.inc.3 = getelementptr i32* %scevgep6.phi, i32 4 + %loop_cond.4 = icmp slt i32 %scevgep.sum.3, %adjust_ub + br i1 %loop_cond.4, label %stmt., label %loop_exit +} +