From 1f595bb42950088ccb8246e6b065a96027b46ec6 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 17 Apr 2009 19:07:39 +0000 Subject: [PATCH] Use CallConvLower.h and TableGen descriptions of the calling conventions for ARM. Patch by Sandeep Patel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69371 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/CallingConvLower.h | 40 +- include/llvm/Target/TargetCallingConv.td | 11 + lib/Target/ARM/ARM.td | 2 + lib/Target/ARM/ARMCallingConv.td | 84 +++ lib/Target/ARM/ARMISelLowering.cpp | 777 +++++++++++++--------- lib/Target/ARM/ARMISelLowering.h | 8 + lib/Target/ARM/CMakeLists.txt | 1 + lib/Target/ARM/Makefile | 2 +- test/CodeGen/ARM/arguments2.ll | 9 + test/CodeGen/ARM/arguments3.ll | 9 + test/CodeGen/ARM/arguments4.ll | 9 + test/CodeGen/ARM/arguments5.ll | 9 + test/CodeGen/ARM/arguments6.ll | 9 + test/CodeGen/ARM/arguments7.ll | 9 + test/CodeGen/ARM/arguments8.ll | 9 + test/CodeGen/ARM/formal.ll | 8 + test/CodeGen/ARM/ret_f32_arg2.ll | 6 + test/CodeGen/ARM/ret_f32_arg5.ll | 6 + test/CodeGen/ARM/ret_f64_arg2.ll | 6 + test/CodeGen/ARM/ret_f64_arg_reg_split.ll | 6 + test/CodeGen/ARM/ret_f64_arg_split.ll | 6 + test/CodeGen/ARM/ret_f64_arg_stack.ll | 6 + test/CodeGen/ARM/ret_i128_arg2.ii | 6 + test/CodeGen/ARM/ret_i64_arg2.ll | 6 + test/CodeGen/ARM/ret_i64_arg3.ll | 6 + test/CodeGen/ARM/ret_i64_arg_split.ll | 6 + utils/TableGen/CallingConvEmitter.cpp | 9 + 27 files changed, 752 insertions(+), 313 deletions(-) create mode 100644 lib/Target/ARM/ARMCallingConv.td create mode 100644 test/CodeGen/ARM/arguments2.ll create mode 100644 test/CodeGen/ARM/arguments3.ll create mode 100644 test/CodeGen/ARM/arguments4.ll create mode 100644 test/CodeGen/ARM/arguments5.ll create mode 100644 test/CodeGen/ARM/arguments6.ll create mode 100644 test/CodeGen/ARM/arguments7.ll create mode 100644 test/CodeGen/ARM/arguments8.ll create mode 100644 test/CodeGen/ARM/formal.ll create mode 100644 test/CodeGen/ARM/ret_f32_arg2.ll create mode 100644 test/CodeGen/ARM/ret_f32_arg5.ll create mode 100644 test/CodeGen/ARM/ret_f64_arg2.ll create mode 100644 test/CodeGen/ARM/ret_f64_arg_reg_split.ll create mode 100644 test/CodeGen/ARM/ret_f64_arg_split.ll create mode 100644 test/CodeGen/ARM/ret_f64_arg_stack.ll create mode 100644 test/CodeGen/ARM/ret_i128_arg2.ii create mode 100644 test/CodeGen/ARM/ret_i64_arg2.ll create mode 100644 test/CodeGen/ARM/ret_i64_arg3.ll create mode 100644 test/CodeGen/ARM/ret_i64_arg_split.ll diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 2a57cc183d2..4b7e54cc460 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -32,7 +32,8 @@ public: Full, // The value fills the full location. SExt, // The value is sign extended in the location. ZExt, // The value is zero extended in the location. - AExt // The value is extended with undefined upper bits. + AExt, // The value is extended with undefined upper bits. + BCvt // The value is bit-converted in the location // TODO: a subset of the value is in the location. }; private: @@ -45,8 +46,11 @@ private: /// isMem - True if this is a memory loc, false if it is a register loc. bool isMem : 1; + /// isCustom - True if this arg/retval requires special handling + bool isCustom : 1; + /// Information about how the value is assigned. - LocInfo HTP : 7; + LocInfo HTP : 6; /// ValVT - The type of the value being assigned. MVT ValVT; @@ -62,11 +66,22 @@ public: Ret.ValNo = ValNo; Ret.Loc = RegNo; Ret.isMem = false; + Ret.isCustom = false; Ret.HTP = HTP; Ret.ValVT = ValVT; Ret.LocVT = LocVT; return Ret; } + + static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, + unsigned RegNo, MVT LocVT, + LocInfo HTP) { + CCValAssign Ret; + Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP); + Ret.isCustom = true; + return Ret; + } + static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP) { @@ -74,18 +89,30 @@ public: Ret.ValNo = ValNo; Ret.Loc = Offset; Ret.isMem = true; + Ret.isCustom = false; Ret.HTP = HTP; Ret.ValVT = ValVT; Ret.LocVT = LocVT; return Ret; } + static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, + unsigned Offset, MVT LocVT, + LocInfo HTP) { + CCValAssign Ret; + Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP); + Ret.isCustom = true; + return Ret; + } + unsigned getValNo() const { return ValNo; } MVT getValVT() const { return ValVT; } bool isRegLoc() const { return !isMem; } bool isMemLoc() const { return isMem; } + bool needsCustom() const { return isCustom; } + unsigned getLocReg() const { assert(isRegLoc()); return Loc; } unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; } MVT getLocVT() const { return LocVT; } @@ -93,14 +120,19 @@ public: LocInfo getLocInfo() const { return HTP; } }; - /// CCAssignFn - This function assigns a location for Val, updating State to /// reflect the change. typedef bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); - +/// CCCustomFn - This function assigns a location for Val, possibly updating +/// all args to reflect changes and indicates if it handled it. It must set +/// isCustom if it handles the arg and returns true. +typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State); + /// CCState - This class holds information needed while lowering arguments and /// return values. It captures which registers are already assigned and which /// stack slots are used. It provides accessors to allocate these values. diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td index 908e16ed5e7..224c08e176c 100644 --- a/include/llvm/Target/TargetCallingConv.td +++ b/include/llvm/Target/TargetCallingConv.td @@ -15,6 +15,11 @@ class CCAction; class CallingConv; +/// CCCustom - Calls a custom arg handling function. +class CCCustom : CCAction { + string FuncName = fn; +} + /// CCPredicateAction - Instances of this class check some predicate, then /// delegate to another action if the predicate is true. class CCPredicateAction : CCAction { @@ -90,6 +95,12 @@ class CCPromoteToType : CCAction { ValueType DestTy = destTy; } +/// CCBitConvertToType - If applied, this bitconverts the specified current +/// value to the specified type. +class CCBitConvertToType : CCAction { + ValueType DestTy = destTy; +} + /// CCDelegateTo - This action invokes the specified sub-calling-convention. It /// is successful if the specified CC matches. class CCDelegateTo : CCAction { diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index aca868fd763..3cdbb13f436 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -90,6 +90,8 @@ def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; include "ARMRegisterInfo.td" +include "ARMCallingConv.td" + //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td new file mode 100644 index 00000000000..f94b65ee078 --- /dev/null +++ b/lib/Target/ARM/ARMCallingConv.td @@ -0,0 +1,84 @@ +//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for ARM architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget: + CCIf().", F), A>; + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign: + CCIf; + +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_APCS : CallingConv<[ + + CCIfType<[i8, i16], CCPromoteToType>, + + // f64 is passed in pairs of GPRs, possibly split onto the stack + CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3]>>, + + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 4>> +]>; + +def RetCC_ARM_APCS : CallingConv<[ + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_AAPCS : CallingConv<[ + + CCIfType<[i8, i16], CCPromoteToType>, + + // i64/f64 is passed in even pairs of GPRs + // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register + CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, + CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3]>>, + + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM Calling Convention Dispatch +//===----------------------------------------------------------------------===// + +def CC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo>, + CCDelegateTo +]>; + +def RetCC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo>, + CCDelegateTo +]>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d018796a4b4..ab33ef47559 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -22,20 +22,44 @@ #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Instruction.h" #include "llvm/Intrinsics.h" #include "llvm/GlobalValue.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/MathExtras.h" using namespace llvm; +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM), ARMPCLabelIndex(0) { Subtarget = &TM.getSubtarget(); @@ -361,38 +385,208 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, return Invert; } -static void -HowToPassArgument(MVT ObjectVT, unsigned NumGPRs, - unsigned StackOffset, unsigned &NeededGPRs, - unsigned &NeededStackSize, unsigned &GPRPad, - unsigned &StackPad, ISD::ArgFlagsTy Flags) { - NeededStackSize = 0; - NeededGPRs = 0; - StackPad = 0; - GPRPad = 0; - unsigned align = Flags.getOrigAlign(); - GPRPad = NumGPRs % ((align + 3)/4); - StackPad = StackOffset % align; - unsigned firstGPR = NumGPRs + GPRPad; - switch (ObjectVT.getSimpleVT()) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i32: - case MVT::f32: - if (firstGPR < 4) - NeededGPRs = 1; +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +// +// The lower operations present on calling convention works on this order: +// LowerCALL (virt regs --> phys regs, virt regs --> stack) +// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) +// LowerRET (virt regs --> phys regs) +// LowerCALL (phys regs --> virt regs) +// +//===----------------------------------------------------------------------===// + +#include "ARMGenCallingConv.inc" + +// APCS f64 is in register pairs, possibly split to stack +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const unsigned LoRegList[] = { ARM::R1, + ARM::R2, + ARM::R3, + ARM::NoRegister }; + + if (unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4)) { + unsigned i; + for (i = 0; i < 4; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, + MVT::i32, LocInfo)); + if (LoRegList[i] != ARM::NoRegister) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); else - NeededStackSize = 4; - break; - case MVT::i64: - case MVT::f64: - if (firstGPR < 3) - NeededGPRs = 2; - else if (firstGPR == 3) { - NeededGPRs = 1; - NeededStackSize = 4; - } else - NeededStackSize = 8; + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4, 4), + MVT::i32, LocInfo)); + return true; // we handled it } + + return false; // we didn't handle it +} + +// AAPCS f64 is in aligned register pairs +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + if (unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2)) { + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, + MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + return true; // we handled it + } + + return false; // we didn't handle it +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + if (unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2)) { + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, + MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + return true; // we handled it + } + + return false; // we didn't handle it +} + +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +/// AddLiveIn - This helper function adds the specified physical register to the +/// MachineFunction as a live in value. It also creates a corresponding virtual +/// register for it. +static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, + const TargetRegisterClass *RC) { + assert(RC->contains(PReg) && "Not the correct regclass!"); + unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. The returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *ARMTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + + DebugLoc dl = TheCall->getDebugLoc(); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + bool isVarArg = TheCall->isVarArg(); + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM); + + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + // handle f64 as custom + if (VA.needsCustom()) { + SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + VA = RVLocs[++i]; // skip ahead to next loc + SDValue Hi = DAG.getCopyFromReg(Lo, dl, VA.getLocReg(), VA.getLocVT(), + Lo.getValue(2)); + ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, dl, VA.getValVT(), Lo, + Hi)); + } else { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag).getValue(1); + SDValue Val = Chain.getValue(0); + InFlag = Chain.getValue(2); + + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), + Chain.getValue(0)); + break; + } + + ResultVals.push_back(Val); + } + } + + // Merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/false, NULL, 0, NULL, 0); +} + +/// LowerMemOpCallTo - Store the argument to the stack +SDValue +ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, + const CCValAssign &VA, + SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + } + return DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset); } /// LowerCALL - Lowering a ISD::CALL node into a callseq_start <- @@ -400,33 +594,22 @@ HowToPassArgument(MVT ObjectVT, unsigned NumGPRs, /// nodes. SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { CallSDNode *TheCall = cast(Op.getNode()); - MVT RetVT = TheCall->getRetValType(0); - SDValue Chain = TheCall->getChain(); - assert((TheCall->getCallingConv() == CallingConv::C || - TheCall->getCallingConv() == CallingConv::Fast) && - "unknown calling convention"); - SDValue Callee = TheCall->getCallee(); - unsigned NumOps = TheCall->getNumArgs(); - DebugLoc dl = TheCall->getDebugLoc(); - unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - unsigned NumGPRs = 0; // GPRs used for parameter passing. + MVT RetVT = TheCall->getRetValType(0); + SDValue Chain = TheCall->getChain(); + unsigned CC = TheCall->getCallingConv(); + assert((CC == CallingConv::C || + CC == CallingConv::Fast) && "unknown calling convention"); + bool isVarArg = TheCall->isVarArg(); + SDValue Callee = TheCall->getCallee(); + DebugLoc dl = TheCall->getDebugLoc(); - // Count how many bytes are to be pushed on the stack. - unsigned NumBytes = 0; + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(TheCall, CC_ARM); - // Add up all the space actually used. - for (unsigned i = 0; i < NumOps; ++i) { - unsigned ObjSize; - unsigned ObjGPRs; - unsigned StackPad; - unsigned GPRPad; - MVT ObjectVT = TheCall->getArg(i).getValueType(); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); - HowToPassArgument(ObjectVT, NumGPRs, NumBytes, ObjGPRs, ObjSize, - GPRPad, StackPad, Flags); - NumBytes += ObjSize + StackPad; - NumGPRs += ObjGPRs + GPRPad; - } + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass @@ -434,77 +617,64 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; - NumGPRs = 0; - std::vector > RegsToPass; - std::vector MemOpChains; - for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); - MVT ArgVT = Arg.getValueType(); + // Walk the register/memloc assignments, inserting copies/loads. In the case + // of tail call optimization arguments are handle later. + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(realArgIdx); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx); - unsigned ObjSize; - unsigned ObjGPRs; - unsigned GPRPad; - unsigned StackPad; - HowToPassArgument(ArgVT, NumGPRs, ArgOffset, ObjGPRs, - ObjSize, GPRPad, StackPad, Flags); - NumGPRs += GPRPad; - ArgOffset += StackPad; - if (ObjGPRs > 0) { - switch (ArgVT.getSimpleVT()) { - default: assert(0 && "Unexpected ValueType for argument!"); - case MVT::i32: - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Arg)); - break; - case MVT::f32: - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg))); - break; - case MVT::i64: { - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(0, getPointerTy())); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(1, getPointerTy())); - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Lo)); - if (ObjGPRs == 2) - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], Hi)); - else { - SDValue PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff, NULL, 0)); - } - break; - } - case MVT::f64: { - SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), - &Arg, 1); - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Cvt)); - if (ObjGPRs == 2) - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], - Cvt.getValue(1))); - else { - SDValue PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Cvt.getValue(1), PtrOff, - NULL, 0)); - } - break; - } - } - } else { - assert(ObjSize != 0); - SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; } - NumGPRs += ObjGPRs; - ArgOffset += ObjSize; + // f64 is passed in i32 pairs and must be combined + if (VA.needsCustom()) { + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isRegLoc()) + RegsToPass.push_back(std::make_pair(VA.getLocReg(), + fmrrd.getValue(1))); + else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, fmrrd.getValue(1), + Flags)); + } + } else if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Arg, Flags)); + } } if (!MemOpChains.empty()) @@ -610,107 +780,82 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (RetVT != MVT::Other) InFlag = Chain.getValue(1); - std::vector ResultVals; - - // If the call has results, copy the values out of the ret val registers. - switch (RetVT.getSimpleVT()) { - default: assert(0 && "Unexpected ret value!"); - case MVT::Other: - break; - case MVT::i32: - Chain = DAG.getCopyFromReg(Chain, dl, ARM::R0, - MVT::i32, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - if (TheCall->getNumRetVals() > 1 && - TheCall->getRetValType(1) == MVT::i32) { - // Returns a i64 value. - Chain = DAG.getCopyFromReg(Chain, dl, ARM::R1, MVT::i32, - Chain.getValue(2)).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - } - break; - case MVT::f32: - Chain = DAG.getCopyFromReg(Chain, dl, ARM::R0, - MVT::i32, InFlag).getValue(1); - ResultVals.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, - Chain.getValue(0))); - break; - case MVT::f64: { - SDValue Lo = DAG.getCopyFromReg(Chain, dl, ARM::R0, MVT::i32, InFlag); - SDValue Hi = DAG.getCopyFromReg(Lo, dl, ARM::R1, MVT::i32, Lo.getValue(2)); - ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi)); - break; - } - } - - if (ResultVals.empty()) - return Chain; - - ResultVals.push_back(Chain); - SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl); - return Res.getValue(Op.getResNo()); + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); } -static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { - SDValue Copy; +SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { + // The chain is always operand #0 SDValue Chain = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - switch(Op.getNumOperands()) { - default: - assert(0 && "Do not know how to return this many arguments!"); - abort(); - case 1: { - SDValue LR = DAG.getRegister(ARM::LR, MVT::i32); - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); - } - case 3: - Op = Op.getOperand(1); - if (Op.getValueType() == MVT::f32) { - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); - } else if (Op.getValueType() == MVT::f64) { - // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is - // available. - Op = DAG.getNode(ARMISD::FMRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op,1); - SDValue Sign = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ISD::RET, dl, MVT::Other, Chain, Op, Sign, - Op.getValue(1), Sign); - } - Copy = DAG.getCopyToReg(Chain, dl, ARM::R0, Op, SDValue()); - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R0); - break; - case 5: - Copy = DAG.getCopyToReg(Chain, dl, ARM::R1, Op.getOperand(3), SDValue()); - Copy = DAG.getCopyToReg(Copy, dl, ARM::R0, Op.getOperand(1), - Copy.getValue(1)); - // If we haven't noted the R0+R1 are live out, do so now. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R0); - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R1); - } - break; - case 9: // i128 -> 4 regs - Copy = DAG.getCopyToReg(Chain, dl, ARM::R3, Op.getOperand(7), SDValue()); - Copy = DAG.getCopyToReg(Copy, dl, ARM::R2, Op.getOperand(5), - Copy.getValue(1)); - Copy = DAG.getCopyToReg(Copy, dl, ARM::R1, Op.getOperand(3), - Copy.getValue(1)); - Copy = DAG.getCopyToReg(Copy, dl, ARM::R0, Op.getOperand(1), - Copy.getValue(1)); - // If we haven't noted the R0+R1 are live out, do so now. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R0); - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R1); - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R2); - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R3); - } - break; + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + + // Analize return values of ISD::RET + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Copy, Copy.getValue(1)); + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + SDValue Arg = Op.getOperand(realRVLocIdx*2+1); + + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is + // available. + if (VA.needsCustom()) { + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Flag); + } else + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + + // guarantee that all emitted copies are + // stuck together, avoiding something bad + Flag = Chain.getValue(1); + } + + SDValue result; + if (Flag.getNode()) + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else // Return Void + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + + return result; } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as @@ -933,123 +1078,139 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } -static SDValue LowerFORMAL_ARGUMENT(SDValue Op, SelectionDAG &DAG, - unsigned ArgNo, unsigned &NumGPRs, - unsigned &ArgOffset, DebugLoc dl) { - MachineFunction &MF = DAG.getMachineFunction(); - MVT ObjectVT = Op.getValue(ArgNo).getValueType(); - SDValue Root = Op.getOperand(0); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); - - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - unsigned ObjSize; - unsigned ObjGPRs; - unsigned GPRPad; - unsigned StackPad; - ISD::ArgFlagsTy Flags = - cast(Op.getOperand(ArgNo + 3))->getArgFlags(); - HowToPassArgument(ObjectVT, NumGPRs, ArgOffset, ObjGPRs, - ObjSize, GPRPad, StackPad, Flags); - NumGPRs += GPRPad; - ArgOffset += StackPad; - - SDValue ArgValue; - if (ObjGPRs == 1) { - unsigned VReg; - if (AFI->isThumbFunction()) - VReg = RegInfo.createVirtualRegister(ARM::tGPRRegisterClass); - else - VReg = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs], VReg); - ArgValue = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); - if (ObjectVT == MVT::f32) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue); - } else if (ObjGPRs == 2) { - unsigned VReg; - if (AFI->isThumbFunction()) - VReg = RegInfo.createVirtualRegister(ARM::tGPRRegisterClass); - else - VReg = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs], VReg); - ArgValue = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); - - if (AFI->isThumbFunction()) - VReg = RegInfo.createVirtualRegister(ARM::tGPRRegisterClass); - else - VReg = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs+1], VReg); - SDValue ArgValue2 = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); - - assert(ObjectVT != MVT::i64 && "i64 should already be lowered"); - ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); - } - NumGPRs += ObjGPRs; - - if (ObjSize) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - if (ObjGPRs == 0) - ArgValue = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); - else { - SDValue ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); - assert(ObjectVT != MVT::i64 && "i64 should already be lowered"); - ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); - } - - ArgOffset += ObjSize; // Move on to the next argument. - } - - return ArgValue; -} - SDValue ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { - std::vector ArgValues; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SDValue Root = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - unsigned NumGPRs = 0; // GPRs used for parameter passing. - - unsigned NumArgs = Op.getNode()->getNumValues()-1; - for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) - ArgValues.push_back(LowerFORMAL_ARGUMENT(Op, DAG, ArgNo, - NumGPRs, ArgOffset, dl)); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + ARMFunctionInfo *AFI = MF.getInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM); + + SmallVector ArgValues; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored on registers + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + TargetRegisterClass *RC; + if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + if (RegVT == MVT::f64) { + // f64 is passed in pairs of GPRs and must be combined + RegVT = MVT::i32; + } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) + assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = AddLiveIn(MF, VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + + // f64 is passed in i32 pairs and must be combined + if (VA.needsCustom()) { + SDValue ArgValue2; + + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isMemLoc()) { + // must be APCS and older than V5T to split like this + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load node to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); + } else { + Reg = AddLiveIn(MF, VA.getLocReg(), RC); + ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); + } + + ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, + ArgValue, ArgValue2); + } + + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::ZExt: + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + } + + ArgValues.push_back(ArgValue); + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); + + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // varargs if (isVarArg) { static const unsigned GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); + unsigned NumGPRs = CCInfo.getFirstUnallocated(GPRArgRegs, + sizeof(GPRArgRegs)/sizeof(GPRArgRegs[0])); + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned VARegSize = (4 - NumGPRs) * 4; unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); + unsigned ArgOffset = 0; if (VARegSaveSize) { // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); + ArgOffset = CCInfo.getNextStackOffset(); VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); SmallVector MemOps; for (; NumGPRs < 4; ++NumGPRs) { - unsigned VReg; + TargetRegisterClass *RC; if (AFI->isThumbFunction()) - VReg = RegInfo.createVirtualRegister(ARM::tGPRRegisterClass); + RC = ARM::tGPRRegisterClass; else - VReg = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs], VReg); + RC = ARM::GPRRegisterClass; + + unsigned VReg = AddLiveIn(MF, GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); @@ -1068,7 +1229,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()); + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); } /// isFloatingPointZero - Return true if this is +0.0. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index c9ce57b38b9..cb07b1538cd 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -18,6 +18,7 @@ #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/CallingConvLower.h" #include namespace llvm { @@ -147,7 +148,14 @@ namespace llvm { /// unsigned ARMPCLabelIndex; + SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, const CCValAssign &VA, + SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags); + SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG); SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 2b913539f68..2ac40f53549 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -8,6 +8,7 @@ tablegen(ARMGenInstrInfo.inc -gen-instr-desc) tablegen(ARMGenCodeEmitter.inc -gen-emitter) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) tablegen(ARMGenDAGISel.inc -gen-dag-isel) +tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) add_llvm_target(ARMCodeGen diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index b5d7172d88e..9a3b9be5b34 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ - ARMGenCodeEmitter.inc + ARMGenCodeEmitter.inc ARMGenCallingConv.inc DIRS = AsmPrinter diff --git a/test/CodeGen/ARM/arguments2.ll b/test/CodeGen/ARM/arguments2.ll new file mode 100644 index 00000000000..eb7e45b4f36 --- /dev/null +++ b/test/CodeGen/ARM/arguments2.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define i32 @f(i32 %a, i128 %b) { + %tmp = call i32 @g(i128 %b) + ret i32 %tmp +} + +declare i32 @g(i128) diff --git a/test/CodeGen/ARM/arguments3.ll b/test/CodeGen/ARM/arguments3.ll new file mode 100644 index 00000000000..97c040521d8 --- /dev/null +++ b/test/CodeGen/ARM/arguments3.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define i64 @f(i32 %a, i128 %b) { + %tmp = call i64 @g(i128 %b) + ret i64 %tmp +} + +declare i64 @g(i128) diff --git a/test/CodeGen/ARM/arguments4.ll b/test/CodeGen/ARM/arguments4.ll new file mode 100644 index 00000000000..63ba64b27f1 --- /dev/null +++ b/test/CodeGen/ARM/arguments4.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define float @f(i32 %a, i128 %b) { + %tmp = call float @g(i128 %b) + ret float %tmp +} + +declare float @g(i128) diff --git a/test/CodeGen/ARM/arguments5.ll b/test/CodeGen/ARM/arguments5.ll new file mode 100644 index 00000000000..2000ff7b4a8 --- /dev/null +++ b/test/CodeGen/ARM/arguments5.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define double @f(i32 %a, i128 %b) { + %tmp = call double @g(i128 %b) + ret double %tmp +} + +declare double @g(i128) diff --git a/test/CodeGen/ARM/arguments6.ll b/test/CodeGen/ARM/arguments6.ll new file mode 100644 index 00000000000..a18c621d143 --- /dev/null +++ b/test/CodeGen/ARM/arguments6.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define i128 @f(i32 %a, i128 %b) { + %tmp = call i128 @g(i128 %b) + ret i128 %tmp +} + +declare i128 @g(i128) diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll new file mode 100644 index 00000000000..489ffd41604 --- /dev/null +++ b/test/CodeGen/ARM/arguments7.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) { + %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) + ret double %tmp +} + +declare double @g(double) diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll new file mode 100644 index 00000000000..5ff7e09548e --- /dev/null +++ b/test/CodeGen/ARM/arguments8.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin + +define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) { + %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) + ret i64 %tmp +} + +declare i64 @g(i64) diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll new file mode 100644 index 00000000000..6d6d108f328 --- /dev/null +++ b/test/CodeGen/ARM/formal.ll @@ -0,0 +1,8 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +declare void @bar(i64 %x, i64 %y) + +define void @foo() { + call void @bar(i64 2, i64 3) + ret void +} diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll new file mode 100644 index 00000000000..287d92b9eb6 --- /dev/null +++ b/test/CodeGen/ARM/ret_f32_arg2.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define float @test_f32(float %a1, float %a2) { + ret float %a2 +} + diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll new file mode 100644 index 00000000000..3418be93e1e --- /dev/null +++ b/test/CodeGen/ARM/ret_f32_arg5.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) { + ret float %a5 +} + diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll new file mode 100644 index 00000000000..66848d5fb49 --- /dev/null +++ b/test/CodeGen/ARM/ret_f64_arg2.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define double @test_f64(double %a1, double %a2) { + ret double %a2 +} + diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll new file mode 100644 index 00000000000..626ee6fb137 --- /dev/null +++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mcpu=arm8 -mattr=+vfp2 + +define double @test_double_arg_reg_split(i32 %a1, double %a2) { + ret double %a2 +} + diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll new file mode 100644 index 00000000000..b03b604beee --- /dev/null +++ b/test/CodeGen/ARM/ret_f64_arg_split.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) { + ret double %a3 +} + diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll new file mode 100644 index 00000000000..ba3ec7fb751 --- /dev/null +++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) { + ret double %a4 +} + diff --git a/test/CodeGen/ARM/ret_i128_arg2.ii b/test/CodeGen/ARM/ret_i128_arg2.ii new file mode 100644 index 00000000000..0fe98e6b70f --- /dev/null +++ b/test/CodeGen/ARM/ret_i128_arg2.ii @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) { + ret i128 %a3 +} + diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll new file mode 100644 index 00000000000..b015a96e0bf --- /dev/null +++ b/test/CodeGen/ARM/ret_i64_arg2.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define i64 @test_i64(i64 %a1, i64 %a2) { + ret i64 %a2 +} + diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll new file mode 100644 index 00000000000..5dfecca319a --- /dev/null +++ b/test/CodeGen/ARM/ret_i64_arg3.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) { + ret i64 %a3 +} + diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll new file mode 100644 index 00000000000..5bd5cb2a230 --- /dev/null +++ b/test/CodeGen/ARM/ret_i64_arg_split.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 + +define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) { + ret i64 %a3 +} + diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp index a9829851d71..211ded4d9d1 100644 --- a/utils/TableGen/CallingConvEmitter.cpp +++ b/utils/TableGen/CallingConvEmitter.cpp @@ -182,6 +182,10 @@ void CallingConvEmitter::EmitAction(Record *Action, << IndentStr << IndentStr << "LocInfo = CCValAssign::ZExt;\n" << IndentStr << "else\n" << IndentStr << IndentStr << "LocInfo = CCValAssign::AExt;\n"; + } else if (Action->isSubClassOf("CCBitConvertToType")) { + Record *DestTy = Action->getValueAsDef("DestTy"); + O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; + O << IndentStr << "LocInfo = CCValAssign::BCvt;\n"; } else if (Action->isSubClassOf("CCPassByVal")) { int Size = Action->getValueAsInt("Size"); int Align = Action->getValueAsInt("Align"); @@ -189,6 +193,11 @@ void CallingConvEmitter::EmitAction(Record *Action, << "State.HandleByVal(ValNo, ValVT, LocVT, LocInfo, " << Size << ", " << Align << ", ArgFlags);\n"; O << IndentStr << "return false;\n"; + } else if (Action->isSubClassOf("CCCustom")) { + O << IndentStr + << "if (" << Action->getValueAsString("FuncName") << "(ValNo, ValVT, " + << "LocVT, LocInfo, ArgFlags, State))\n"; + O << IndentStr << IndentStr << "return false;\n"; } else { Action->dump(); throw "Unknown CCAction!";