ARM: Use ldrd/strd to spill 64-bit pairs when available.

This allows common sp-offsets to be part of the instruction and is
probably faster on modern CPUs too.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-04-21 11:57:07 +00:00
parent 7aad829243
commit 4cc1407b84
4 changed files with 133 additions and 50 deletions

View File

@ -747,10 +747,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
static const
MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
unsigned Reg, unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) {
const MachineInstrBuilder &
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
@ -795,12 +795,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
AddDefaultPred(MIB);
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
.addFrameIndex(FI).addMemOperand(MMO));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
@ -948,7 +958,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@ -975,12 +984,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
.addFrameIndex(FI).addMemOperand(MMO));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
MachineInstrBuilder MIB;
if (Subtarget.hasV5TEOps()) {
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
AddDefaultPred(MIB);
} else {
// Fallback to LDM instruction, which has existed since the dawn of
// time.
MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
.addFrameIndex(FI).addMemOperand(MMO));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else

View File

@ -141,6 +141,10 @@ public:
MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;

View File

@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
// Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
MachineRegisterInfo *MRI = &MF.getRegInfo();
MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
AddDefaultPred(MIB);
return;
}
ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
// Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
MachineRegisterInfo *MRI = &MF.getRegInfo();
MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
AddDefaultPred(MIB);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
return;
}
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = -Offset;
isSub = true;
}
} else if (AddrMode == ARMII::AddrModeT2_i8s4) {
Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
NumBits = 8;
// MCInst operand has already scaled value.
Scale = 1;
if (Offset < 0) {
isSub = true;
Offset = -Offset;
}
} else {
llvm_unreachable("Unsupported addressing mode!");
}

View File

@ -1,29 +1,43 @@
; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD
; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=INSTRS-ARE-THUMB
define void @foo(i64* %addr) {
%val1 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val2 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val3 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val4 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val5 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val6 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val7 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
%val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
; Key point is that enough 64-bit paired GPR values are live that
; one of them has to be spilled. This used to cause an abort because
; an LDMIA was created with both a FrameIndex and an offset, which
; is not allowed.
; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; We also want to ensure the register scavenger is working (i.e. an
; offset from sp can be generated), so we need two spills.
; CHECK: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
; CHECK: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; In principle LLVM may have to recalculate the offset. At the moment
; it reuses the original though.
; CHECK: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; Make sure we are actually creating the Thumb versions of the spill
; instructions.
; INSTRS-ARE-THUMB: t2STRDi8
; INSTRS-ARE-THUMB: t2LDRDi8
store volatile i64 %val1, i64* %addr
store volatile i64 %val2, i64* %addr