From bba9390fc6c0d536172c6bb4a9c93db557c1aff4 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 1 Oct 2013 14:33:28 +0000 Subject: [PATCH] ARM: support interrupt attribute This function-attribute modifies the callee-saved register list and function epilogue (specifically the return instruction) so that a routine is suitable for use as an interrupt-handler of the specified type without disrupting user-mode applications. rdar://problem/14207019 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191766 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 36 +++++-- lib/Target/ARM/ARMCallingConv.td | 22 +++- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 12 +++ lib/Target/ARM/ARMFrameLowering.cpp | 26 ++++- lib/Target/ARM/ARMISelLowering.cpp | 56 +++++++++- lib/Target/ARM/ARMISelLowering.h | 1 + lib/Target/ARM/ARMInstrInfo.td | 9 +- lib/Target/ARM/ARMInstrThumb2.td | 7 +- test/CodeGen/ARM/interrupt-attr.ll | 130 ++++++++++++++++++++++++ 9 files changed, 278 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/ARM/interrupt-attr.ll diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 58c06e392cf..ce89d7beb14 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -51,20 +51,34 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool ghcCall = false; - - if (MF) { - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - } - - if (ghcCall) + const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_SaveList + : CSR_AAPCS_SaveList; + + if (!MF) return RegList; + + const Function *F = MF->getFunction(); + if (F->getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; - else - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + } else if (F->hasFnAttribute("interrupt")) { + if (STI.isMClass()) { + // M-class CPUs have hardware which saves the registers needed to allow a + // function conforming to the AAPCS to function as a handler. + return CSR_AAPCS_SaveList; + } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { + // Fast interrupt mode gives the handler a private copy of R8-R14, so less + // need to be saved to restore user-mode state. + return CSR_FIQ_SaveList; + } else { + // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by + // exception handling. + return CSR_GenericInt_SaveList; + } + } + + return RegList; } const uint32_t* diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 89c52239892..9bea4b2d68e 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -207,4 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, - (sub CSR_AAPCS_ThisReturn, R9))>; + (sub CSR_AAPCS_ThisReturn, R9))>; + +// The "interrupt" attribute is used to generate code that is acceptable in +// exception-handlers of various kinds. It makes us use a different return +// instruction (handled elsewhere) and affects which registers we must return to +// our "caller" in the same state as we receive them. + +// For most interrupts, all registers except SP and LR are shared with +// user-space. We mark LR to be saved anyway, since this is what the ARM backend +// generally does rather than tracking its liveness as a normal register. +def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; + +// The fast interrupt handlers have more private state and get their own copies +// of R8-R12, in addition to SP and LR. As before, mark LR for saving too. + +// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and +// current frame lowering expects to encounter it while processing callee-saved +// registers. +def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; + + diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2f7f1bfbf7c..109135e9009 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -962,6 +962,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandMOV32BitImm(MBB, MBBI); return true; + case ARM::SUBS_PC_LR: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) + .addReg(ARM::LR) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addReg(ARM::CPSR, RegState::Undef); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } case ARM::VLDMQIA: { unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index c8637be2bdd..7571c677b51 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -175,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: @@ -189,6 +193,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetIOS()) { @@ -373,7 +378,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { do --MBBI; @@ -658,6 +663,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); + bool isInterrupt = + RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; SmallVector Regs; unsigned i = CSI.size(); @@ -672,7 +679,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { + if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && + STI.hasV5TOps()) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. @@ -1199,7 +1207,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -1226,6 +1234,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, case ARM::LR: LRSpilled = true; // Fallthrough + case ARM::R0: case ARM::R1: + case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: CS1Spilled = true; @@ -1240,6 +1250,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } switch (Reg) { + case ARM::R0: case ARM::R1: + case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: @@ -1295,8 +1307,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (!LRSpilled && CS1Spilled) { MRI.setPhysRegUsed(ARM::LR); NumGPRSpills++; - UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), - UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + SmallVectorImpl::iterator LRPos; + LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), + (unsigned)ARM::LR); + if (LRPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(LRPos); + ForceLRSpill = false; ExtraCSSpill = true; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 96942ec8b7d..a2c6f0c3950 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1015,6 +1015,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; @@ -1966,6 +1967,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isVarArg && !Outs.empty()) return false; + // Exception-handling functions need a special set of instructions to indicate + // a return to the hardware. Tail-calling another function would probably + // break this. + if (CallerF->hasFnAttribute("interrupt")) + return false; + // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) @@ -2094,6 +2101,39 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, isVarArg)); } +static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, + SDLoc DL, SelectionDAG &DAG) { + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + + StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + + // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset + // version of the "preferred return address". These offsets affect the return + // instruction if this is a return from PL1 without hypervisor extensions. + // IRQ/FIQ: +4 "subs pc, lr, #4" + // SWI: 0 "subs pc, lr, #0" + // ABORT: +4 "subs pc, lr, #4" + // UNDEF: +4/+2 "subs pc, lr, #0" + // UNDEF varies depending on where the exception came from ARM or Thumb + // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. + + int64_t LROffset; + if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || + IntKind == "ABORT") + LROffset = 4; + else if (IntKind == "SWI" || IntKind == "UNDEF") + LROffset = 0; + else + report_fatal_error("Unsupported interrupt attribute. If present, value " + "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); + + RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); + + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, + RetOps.data(), RetOps.size()); +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2179,6 +2219,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); + // CPUs which aren't M-class use a special sequence to return from + // exceptions (roughly, any instruction setting pc and cpsr simultaneously, + // though we use "subs pc, lr, #N"). + // + // M-class CPUs actually use a normal return sequence with a special + // (hardware-provided) value in LR, so the normal code path works. + if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + !Subtarget->isMClass()) { + if (Subtarget->isThumb1Only()) + report_fatal_error("interrupt attribute is not supported in Thumb1"); + return LowerInterruptReturn(RetOps, dl, DAG); + } + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps.data(), RetOps.size()); } @@ -2235,7 +2288,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() != ARMISD::RET_FLAG) + if (UI->getOpcode() != ARMISD::RET_FLAG && + UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 6131a262e1a..3c80334dc4a 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -52,6 +52,7 @@ namespace llvm { BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. + INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 740b71cb2ad..eddd9d176e7 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -121,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - +def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; @@ -1925,6 +1926,12 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001101000001111000000001110; } + + // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets + // the user-space one). + def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p), + 4, IIC_Br, + [(ARMintretflag imm:$offset)]>; } // Indirect branches diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 754970093f0..63f527e7758 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3758,9 +3758,12 @@ def t2RFEIA : T2RFE<0b111010011001, [/* For disassembly only; pattern left blank */]>; // B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction. -let Defs = [PC], Uses = [LR] in +// Exception return instruction is "subs pc, lr, #imm". +let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, - "subs", "\tpc, lr, $imm", []>, Requires<[IsThumb2]> { + "subs", "\tpc, lr, $imm", + [(ARMintretflag imm0_255:$imm)]>, + Requires<[IsThumb2]> { let Inst{31-8} = 0b111100111101111010001111; bits<8> imm; diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll new file mode 100644 index 00000000000..217fd696237 --- /dev/null +++ b/test/CodeGen/ARM/interrupt-attr.ll @@ -0,0 +1,130 @@ +; RUN: llc -mtriple=arm-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s +; RUN: llc -mtriple=thumb-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s +; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s + +declare arm_aapcscc void @bar() + +@bigvar = global [16 x i32] zeroinitializer + +define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" { + ; Must save all registers except banked sp and lr (we save lr anyway because + ; we actually need it at the end to execute the return ourselves). + + ; Also need special function return setting pc and CPSR simultaneously. +; CHECK-A-LABEL: irq_fn: +; CHECK-A: push {r0, r1, r2, r3, r11, lr} +; CHECK-A: add r11, sp, #16 +; CHECK-A: sub sp, sp, #{{[0-9]+}} +; CHECK-A: bic sp, sp, #7 +; CHECK-A: bl bar +; CHECK-A: sub sp, r11, #16 +; CHECK-A: pop {r0, r1, r2, r3, r11, lr} +; CHECK-A: subs pc, lr, #4 + +; CHECK-A-THUMB-LABEL: irq_fn: +; CHECK-A-THUMB: push {r0, r1, r2, r3, r4, r7, lr} +; CHECK-A-THUMB: mov r4, sp +; CHECK-A-THUMB: add r7, sp, #20 +; CHECK-A-THUMB: bic r4, r4, #7 +; CHECK-A-THUMB: bl bar +; CHECK-A-THUMB: sub.w r4, r7, #20 +; CHECK-A-THUMB: mov sp, r4 +; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, lr} +; CHECK-A-THUMB: subs pc, lr, #4 + + ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to + ; appropriate sentinel so no special return needed). +; CHECK-M: push {r4, r7, lr} +; CHECK-M: add r7, sp, #4 +; CHECK-M: sub sp, #4 +; CHECK-M: mov r4, sp +; CHECK-M: mov sp, r4 +; CHECK-M: blx _bar +; CHECK-M: subs r4, r7, #4 +; CHECK-M: mov sp, r4 +; CHECK-M: pop {r4, r7, pc} + + call arm_aapcscc void @bar() + ret void +} + +define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" { +; CHECK-A-LABEL: fiq_fn: +; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; 32 to get past r0, r1, ..., r7 +; CHECK-A: add r11, sp, #32 +; CHECK-A: sub sp, sp, #{{[0-9]+}} +; CHECK-A: bic sp, sp, #7 +; [...] + ; 32 must match above +; CHECK-A: sub sp, r11, #32 +; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} +; CHECK-A: subs pc, lr, #4 + + %val = load volatile [16 x i32]* @bigvar + store volatile [16 x i32] %val, [16 x i32]* @bigvar + ret void +} + +define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" { +; CHECK-A-LABEL: swi_fn: +; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-A: add r11, sp, #44 +; CHECK-A: sub sp, sp, #{{[0-9]+}} +; CHECK-A: bic sp, sp, #7 +; [...] +; CHECK-A: sub sp, r11, #44 +; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-A: subs pc, lr, #0 + + %val = load volatile [16 x i32]* @bigvar + store volatile [16 x i32] %val, [16 x i32]* @bigvar + ret void +} + +define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" { +; CHECK-A-LABEL: undef_fn: +; CHECK-A: push {r0, r1, r2, r3, r11, lr} +; CHECK-A: add r11, sp, #16 +; CHECK-A: sub sp, sp, #{{[0-9]+}} +; CHECK-A: bic sp, sp, #7 +; [...] +; CHECK-A: sub sp, r11, #16 +; CHECK-A: pop {r0, r1, r2, r3, r11, lr} +; CHECK-A: subs pc, lr, #0 + + call void @bar() + ret void +} + +define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" { +; CHECK-A-LABEL: abort_fn: +; CHECK-A: push {r0, r1, r2, r3, r11, lr} +; CHECK-A: add r11, sp, #16 +; CHECK-A: sub sp, sp, #{{[0-9]+}} +; CHECK-A: bic sp, sp, #7 +; [...] +; CHECK-A: sub sp, r11, #16 +; CHECK-A: pop {r0, r1, r2, r3, r11, lr} +; CHECK-A: subs pc, lr, #4 + + call void @bar() + ret void +} + +@var = global double 0.0 + +; We don't save VFP regs, since it would be a massive overhead in the general +; case. +define arm_aapcscc void @floating_fn() alignstack(8) "interrupt"="IRQ" { +; CHECK-A-LABEL: floating_fn: +; CHECK-A-NOT: vpush +; CHECK-A-NOT: vstr +; CHECK-A-NOT: vstm +; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %lhs = load volatile double* @var + %rhs = load volatile double* @var + %sum = fadd double %lhs, %rhs + store double %sum, double* @var + ret void +}