ARM: support interrupt attribute

This function-attribute modifies the callee-saved register list and function
epilogue (specifically the return instruction) so that a routine is suitable
for use as an interrupt-handler of the specified type without disrupting
user-mode applications.

rdar://problem/14207019

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191766 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-10-01 14:33:28 +00:00
parent 6d2158acae
commit bba9390fc6
9 changed files with 278 additions and 21 deletions

View File

@ -51,20 +51,34 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool ghcCall = false;
if (MF) {
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
}
if (ghcCall)
const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList
: CSR_AAPCS_SaveList;
if (!MF) return RegList;
const Function *F = MF->getFunction();
if (F->getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
else
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
} else if (F->hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
return CSR_AAPCS_SaveList;
} else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
return CSR_FIQ_SaveList;
} else {
// Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
// exception handling.
return CSR_GenericInt_SaveList;
}
}
return RegList;
}
const uint32_t*

View File

@ -207,4 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
(sub CSR_AAPCS_ThisReturn, R9))>;
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
// instruction (handled elsewhere) and affects which registers we must return to
// our "caller" in the same state as we receive them.
// For most interrupts, all registers except SP and LR are shared with
// user-space. We mark LR to be saved anyway, since this is what the ARM backend
// generally does rather than tracking its liveness as a normal register.
def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
// The fast interrupt handlers have more private state and get their own copies
// of R8-R12, in addition to SP and LR. As before, mark LR for saving too.
// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and
// current frame lowering expects to encounter it while processing callee-saved
// registers.
def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>;

View File

@ -962,6 +962,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
case ARM::SUBS_PC_LR: {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
.addReg(ARM::LR)
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2))
.addReg(ARM::CPSR, RegState::Undef);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =

View File

@ -175,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = CSI[i].getReg();
int FI = CSI[i].getFrameIdx();
switch (Reg) {
case ARM::R0:
case ARM::R1:
case ARM::R2:
case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
@ -189,6 +193,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R9:
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
if (STI.isTargetIOS()) {
@ -373,7 +378,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
do
--MBBI;
@ -658,6 +663,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
RetOpcode == ARM::TCRETURNri);
bool isInterrupt =
RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@ -672,7 +679,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
STI.hasV5TOps()) {
Reg = ARM::PC;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
// Fold the return instruction into the LDM.
@ -1199,7 +1207,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@ -1226,6 +1234,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
case ARM::LR:
LRSpilled = true;
// Fallthrough
case ARM::R0: case ARM::R1:
case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
CS1Spilled = true;
@ -1240,6 +1250,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
switch (Reg) {
case ARM::R0: case ARM::R1:
case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
case ARM::LR:
@ -1295,8 +1307,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (!LRSpilled && CS1Spilled) {
MRI.setPhysRegUsed(ARM::LR);
NumGPRSpills++;
UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
SmallVectorImpl<unsigned>::iterator LRPos;
LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
(unsigned)ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
ForceLRSpill = false;
ExtraCSSpill = true;
}

View File

@ -1015,6 +1015,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
@ -1966,6 +1967,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isVarArg && !Outs.empty())
return false;
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
if (CallerF->hasFnAttribute("interrupt"))
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
@ -2094,6 +2101,39 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
isVarArg));
}
static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
SDLoc DL, SelectionDAG &DAG) {
const MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
// See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
// version of the "preferred return address". These offsets affect the return
// instruction if this is a return from PL1 without hypervisor extensions.
// IRQ/FIQ: +4 "subs pc, lr, #4"
// SWI: 0 "subs pc, lr, #0"
// ABORT: +4 "subs pc, lr, #4"
// UNDEF: +4/+2 "subs pc, lr, #0"
// UNDEF varies depending on where the exception came from ARM or Thumb
// mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
int64_t LROffset;
if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
IntKind == "ABORT")
LROffset = 4;
else if (IntKind == "SWI" || IntKind == "UNDEF")
LROffset = 0;
else
report_fatal_error("Unsupported interrupt attribute. If present, value "
"must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
RetOps.data(), RetOps.size());
}
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@ -2179,6 +2219,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
// CPUs which aren't M-class use a special sequence to return from
// exceptions (roughly, any instruction setting pc and cpsr simultaneously,
// though we use "subs pc, lr, #N").
//
// M-class CPUs actually use a normal return sequence with a special
// (hardware-provided) value in LR, so the normal code path works.
if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
!Subtarget->isMClass()) {
if (Subtarget->isThumb1Only())
report_fatal_error("interrupt attribute is not supported in Thumb1");
return LowerInterruptReturn(RetOps, dl, DAG);
}
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
RetOps.data(), RetOps.size());
}
@ -2235,7 +2288,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ARMISD::RET_FLAG)
if (UI->getOpcode() != ARMISD::RET_FLAG &&
UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}

View File

@ -52,6 +52,7 @@ namespace llvm {
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
RET_FLAG, // Return with a flag operand.
INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.

View File

@ -121,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@ -1925,6 +1926,12 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001101000001111000000001110;
}
// Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets
// the user-space one).
def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p),
4, IIC_Br,
[(ARMintretflag imm:$offset)]>;
}
// Indirect branches

View File

@ -3758,9 +3758,12 @@ def t2RFEIA : T2RFE<0b111010011001,
[/* For disassembly only; pattern left blank */]>;
// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction.
let Defs = [PC], Uses = [LR] in
// Exception return instruction is "subs pc, lr, #imm".
let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
"subs", "\tpc, lr, $imm", []>, Requires<[IsThumb2]> {
"subs", "\tpc, lr, $imm",
[(ARMintretflag imm0_255:$imm)]>,
Requires<[IsThumb2]> {
let Inst{31-8} = 0b111100111101111010001111;
bits<8> imm;

View File

@ -0,0 +1,130 @@
; RUN: llc -mtriple=arm-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s
; RUN: llc -mtriple=thumb-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s
; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s
declare arm_aapcscc void @bar()
@bigvar = global [16 x i32] zeroinitializer
define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
; Must save all registers except banked sp and lr (we save lr anyway because
; we actually need it at the end to execute the return ourselves).
; Also need special function return setting pc and CPSR simultaneously.
; CHECK-A-LABEL: irq_fn:
; CHECK-A: push {r0, r1, r2, r3, r11, lr}
; CHECK-A: add r11, sp, #16
; CHECK-A: sub sp, sp, #{{[0-9]+}}
; CHECK-A: bic sp, sp, #7
; CHECK-A: bl bar
; CHECK-A: sub sp, r11, #16
; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
; CHECK-A: subs pc, lr, #4
; CHECK-A-THUMB-LABEL: irq_fn:
; CHECK-A-THUMB: push {r0, r1, r2, r3, r4, r7, lr}
; CHECK-A-THUMB: mov r4, sp
; CHECK-A-THUMB: add r7, sp, #20
; CHECK-A-THUMB: bic r4, r4, #7
; CHECK-A-THUMB: bl bar
; CHECK-A-THUMB: sub.w r4, r7, #20
; CHECK-A-THUMB: mov sp, r4
; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, lr}
; CHECK-A-THUMB: subs pc, lr, #4
; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
; appropriate sentinel so no special return needed).
; CHECK-M: push {r4, r7, lr}
; CHECK-M: add r7, sp, #4
; CHECK-M: sub sp, #4
; CHECK-M: mov r4, sp
; CHECK-M: mov sp, r4
; CHECK-M: blx _bar
; CHECK-M: subs r4, r7, #4
; CHECK-M: mov sp, r4
; CHECK-M: pop {r4, r7, pc}
call arm_aapcscc void @bar()
ret void
}
define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
; CHECK-A-LABEL: fiq_fn:
; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
; 32 to get past r0, r1, ..., r7
; CHECK-A: add r11, sp, #32
; CHECK-A: sub sp, sp, #{{[0-9]+}}
; CHECK-A: bic sp, sp, #7
; [...]
; 32 must match above
; CHECK-A: sub sp, r11, #32
; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
; CHECK-A: subs pc, lr, #4
%val = load volatile [16 x i32]* @bigvar
store volatile [16 x i32] %val, [16 x i32]* @bigvar
ret void
}
define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
; CHECK-A-LABEL: swi_fn:
; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-A: add r11, sp, #44
; CHECK-A: sub sp, sp, #{{[0-9]+}}
; CHECK-A: bic sp, sp, #7
; [...]
; CHECK-A: sub sp, r11, #44
; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-A: subs pc, lr, #0
%val = load volatile [16 x i32]* @bigvar
store volatile [16 x i32] %val, [16 x i32]* @bigvar
ret void
}
define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
; CHECK-A-LABEL: undef_fn:
; CHECK-A: push {r0, r1, r2, r3, r11, lr}
; CHECK-A: add r11, sp, #16
; CHECK-A: sub sp, sp, #{{[0-9]+}}
; CHECK-A: bic sp, sp, #7
; [...]
; CHECK-A: sub sp, r11, #16
; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
; CHECK-A: subs pc, lr, #0
call void @bar()
ret void
}
define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" {
; CHECK-A-LABEL: abort_fn:
; CHECK-A: push {r0, r1, r2, r3, r11, lr}
; CHECK-A: add r11, sp, #16
; CHECK-A: sub sp, sp, #{{[0-9]+}}
; CHECK-A: bic sp, sp, #7
; [...]
; CHECK-A: sub sp, r11, #16
; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
; CHECK-A: subs pc, lr, #4
call void @bar()
ret void
}
@var = global double 0.0
; We don't save VFP regs, since it would be a massive overhead in the general
; case.
define arm_aapcscc void @floating_fn() alignstack(8) "interrupt"="IRQ" {
; CHECK-A-LABEL: floating_fn:
; CHECK-A-NOT: vpush
; CHECK-A-NOT: vstr
; CHECK-A-NOT: vstm
; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%lhs = load volatile double* @var
%rhs = load volatile double* @var
%sum = fadd double %lhs, %rhs
store double %sum, double* @var
ret void
}