diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index 40eeb2e6d0f..c79c3428b65 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -50,10 +50,12 @@ public: /// This should be called by the MC lowering code _immediately_ before /// lowering the MI to an MCInst. It records where the operands for the /// instruction are stored, and outputs a label to record the offset of - /// the call from the start of the text section. + /// the call from the start of the text section. In special cases (e.g. AnyReg + /// calling convention) the return register is also recorded if requested. void recordStackMap(const MachineInstr &MI, uint32_t ID, MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE); + MachineInstr::const_mop_iterator MOE, + bool recordResult = false); /// If there is any stack map data, create a stack map section and serialize /// the map info into it. This clears the stack map data structures diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 35c7df9b2ac..4437af25574 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -54,6 +54,10 @@ namespace CallingConv { // WebKit JS - Calling convention for stack based JavaScript calls WebKit_JS = 12, + // AnyReg - Calling convention for dynamic register based calls (e.g. + // stackmap and patchpoint intrinsics). + AnyReg = 13, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 0c0b1edfe9b..3f6eae6bb20 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -807,9 +807,9 @@ def STACKMAP : Instruction { let mayLoad = 1; } def PATCHPOINT : Instruction { - let OutOperandList = (outs); + let OutOperandList = (outs unknown:$dst); let InOperandList = (ins i32imm:$id, i32imm:$nbytes, unknown:$callee, - i32imm:$nargs, variable_ops); + i32imm:$nargs, i32imm:$cc, variable_ops); let isCall = 1; let mayLoad = 1; } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 434376008e9..4c9439a3ecf 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -558,6 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_64_sysvcc); KEYWORD(x86_64_win64cc); KEYWORD(webkit_jscc); + KEYWORD(anyregcc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 079a532da63..ad2cbe39cdc 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1343,6 +1343,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'x86_64_sysvcc' /// ::= 'x86_64_win64cc' /// ::= 'webkit_jscc' +/// ::= 'anyregcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { @@ -1366,6 +1367,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break; case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break; case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; + case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index e8389e4024b..086ea95273d 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -91,7 +91,7 @@ namespace lltok { kw_ptx_kernel, kw_ptx_device, kw_spir_kernel, kw_spir_func, kw_x86_64_sysvcc, kw_x86_64_win64cc, - kw_webkit_jscc, + kw_webkit_jscc, kw_anyregcc, // Attributes: kw_attributes, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e107276359b..1ca200dc8bd 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -211,6 +211,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +219,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -722,10 +727,16 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) + NumDefs = NumResults; + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -748,12 +759,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. @@ -784,8 +795,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d8a2cfb64a2..913f517d85e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6733,7 +6733,8 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// intrinsic's operands need to participate in the calling convention. std::pair SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee) { + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6753,9 +6754,10 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Args.push_back(Entry); } - TargetLowering::CallLoweringInfo CLI(getRoot(), CI.getType(), - /*retSExt*/ false, /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, - NumArgs, CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); const TargetLowering *TLI = TM.getTargetLowering(); @@ -6824,32 +6826,38 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // void|i64 @llvm.experimental.patchpoint.void|i64(i32 , - // i32 , - // i8* , i32 , - // [Args...], [live variables...]) + // i32 , + // i8* , + // i32 , + // [Args...], + // [live variables...]) + unsigned CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); SDValue Callee = getValue(CI.getOperand(2)); // // Get the real number of arguments participating in the call unsigned NumArgs = - cast(getValue(CI.getArgOperand(3)))->getZExtValue(); + cast(getValue(CI.getArgOperand(3)))->getZExtValue(); // Skip the four meta args: , , , assert(CI.getNumArgOperands() >= NumArgs + 4 && "Not enough arguments provided to the patchpoint intrinsic"); + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair Result = - LowerCallOperands(CI, 4, NumArgs, Callee); + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + // Set the root to the target-lowered call chain. SDValue Chain = Result.second; DAG.setRoot(Chain); SDNode *CallEnd = Chain.getNode(); - if (!CI.getType()->isVoidTy()) { - setValue(&CI, Result.first); - if (CallEnd->getOpcode() == ISD::CopyFromReg) - CallEnd = CallEnd->getOperand(0).getNode(); - } + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + /// Get a call instruction from the call sequence chain. /// Tail calls are not allowed. assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && @@ -6870,10 +6878,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { Ops.push_back( DAG.getIntPtrConstant(cast(Callee)->getZExtValue())); - // Adjust to account for any stack arguments. + // Adjust to account for any arguments that have been passed on the + // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] - unsigned NumCallArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); - Ops.push_back(DAG.getTargetConstant(NumCallArgs, MVT::i32)); + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant(CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); // Push the arguments from the call instruction. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; @@ -6906,21 +6925,43 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { if (hasGlue) Ops.push_back(*(Call->op_end()-1)); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); - // Replace the target specific call node with a STACKMAP node. + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, getCurSDLoc(), NodeTys, Ops); - // PatchPoint generates no value, so nothing goes in the NodeMap. - // - // FIXME: with anyregcc calling convention it will need to be in the NodeMap - // and replace values. + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); - + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 6ecf5a0abbb..570f927069d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -622,7 +622,8 @@ public: std::pair LowerCallOperands(const CallInst &CI, unsigned ArgIdx, unsigned NumArgs, - SDValue Callee); + SDValue Callee, + bool useVoidTy = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 51580b479d8..89da78294fc 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -30,7 +30,8 @@ using namespace llvm; void StackMaps::recordStackMap(const MachineInstr &MI, uint32_t ID, MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE) { + MachineInstr::const_mop_iterator MOE, + bool recordResult) { MCContext &OutContext = AP.OutStreamer.getContext(); MCSymbol *MILabel = OutContext.CreateTempSymbol(); @@ -38,6 +39,16 @@ void StackMaps::recordStackMap(const MachineInstr &MI, uint32_t ID, LocationVec CallsiteLocs; + if (recordResult) { + std::pair ParseResult = + OpParser(MI.operands_begin(), llvm::next(MI.operands_begin(), 1)); + + Location &Loc = ParseResult.first; + assert(Loc.LocType == Location::Register && + "Stackmap return location must be a register."); + CallsiteLocs.push_back(Loc); + } + while (MOI != MOE) { std::pair ParseResult = OpParser(MOI, MOE); diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h new file mode 100644 index 00000000000..e76f9fda2db --- /dev/null +++ b/lib/Target/X86/X86CallingConv.h @@ -0,0 +1,35 @@ +//=== X86CallingConv.h - X86 Custom Calling Convention Routines -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the X86 Calling Convention that +// aren't done by tablegen. +// +//===----------------------------------------------------------------------===// + +#ifndef X86CALLINGCONV_H +#define X86CALLINGCONV_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" + +namespace llvm { + +inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + llvm_unreachable("The AnyReg calling convention is only supported by the " \ + "stackmap and patchpoint intrinsics."); + // gracefully fallback to X86 C calling convention on Release builds. + return false; +} + +} // End llvm namespace + +#endif + diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index f5c8d9fda43..a78b5c0a796 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -160,6 +160,17 @@ def RetCC_X86_64_WebKit_JS : CallingConv<[ CCIfType<[i64], CCAssignToReg<[RAX]>> ]>; +// X86-64 AnyReg return-value convention. No explicit register is specified for +// the return-value. The register allocator is allowed and expected to choose +// any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the X86 C calling convention. +def RetCC_X86_64_AnyReg : CallingConv<[ + CCCustom<"CC_X86_AnyReg_Error"> +]>; + // This is the root return-value convention for the X86-32 backend. def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. @@ -178,6 +189,7 @@ def RetCC_X86_64 : CallingConv<[ // Handle JavaScript calls. CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo>, + CCIfCC<"CallingConv::AnyReg", CCDelegateTo>, // Handle explicit CC selection CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, @@ -350,6 +362,16 @@ def CC_X86_64_WebKit_JS : CallingConv<[ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> ]>; +// No explicit register is specified for the AnyReg calling convention. The +// register allocator may assign the arguments to any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the X86 C calling convention. +def CC_X86_64_AnyReg : CallingConv<[ + CCCustom<"CC_X86_AnyReg_Error"> +]>; + //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// @@ -542,6 +564,7 @@ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo>, + CCIfCC<"CallingConv::AnyReg", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7984e76edd6..928dea91b4f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "X86.h" +#include "X86CallingConv.h" #include "X86ISelLowering.h" #include "X86InstrBuilder.h" #include "X86RegisterInfo.h" diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 86ad2621fb4..55bfab449a3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,6 +16,7 @@ #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86.h" +#include "X86CallingConv.h" #include "X86InstrBuilder.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 369b031113e..b81b244828b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4198,14 +4198,20 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, const SmallVectorImpl &Ops, int FrameIndex, const TargetInstrInfo &TII) { + bool hasDef = MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit(); + unsigned StartIdx = hasDef ? 1 : 0; + MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); bool isPatchPoint = MI->getOpcode() == TargetOpcode::PATCHPOINT; - unsigned StartIdx = isPatchPoint ? MI->getOperand(3).getImm() + 4 : 2; + StartIdx = isPatchPoint ? + StartIdx + MI->getOperand(StartIdx+3).getImm() + 5 : + StartIdx + 2; - // No need to fold the meta data and function arguments + // No need to fold return, the meta data, and function arguments for (unsigned i = 0; i < StartIdx; ++i) MIB.addOperand(MI->getOperand(i)); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 92a111843f6..fa15114f914 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -780,26 +780,45 @@ static void LowerSTACKMAP(MCStreamer &OutStreamer, static void LowerPATCHPOINT(MCStreamer &OutStreamer, X86MCInstLower &MCInstLowering, StackMaps &SM, - const MachineInstr &MI) -{ - int64_t ID = MI.getOperand(0).getImm(); + const MachineInstr &MI) { + bool hasDef = MI.getOperand(0).isReg() && MI.getOperand(0).isDef() && + !MI.getOperand(0).isImplicit(); + unsigned StartIdx = hasDef ? 1 : 0; +#ifndef NDEBUG + unsigned StartIdx2 = 0, e = MI.getNumOperands(); + while (StartIdx2 < e && MI.getOperand(StartIdx2).isReg() && + MI.getOperand(StartIdx2).isDef() && + !MI.getOperand(StartIdx2).isImplicit()) + ++StartIdx2; + + assert(StartIdx == StartIdx2 && + "Unexpected additonal definition in Patchpoint intrinsic."); +#endif + + int64_t ID = MI.getOperand(StartIdx).getImm(); assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); // Get the number of arguments participating in the call. This number was // adjusted during call lowering by subtracting stack args. - int64_t StackMapIdx = MI.getOperand(3).getImm() + 4; - assert(StackMapIdx <= MI.getNumOperands() && "Patchpoint dropped args."); + bool isAnyRegCC = MI.getOperand(StartIdx + 4).getImm() == CallingConv::AnyReg; + assert(((hasDef && isAnyRegCC) || !hasDef) && + "Only Patchpoints with AnyReg calling convention may have a result"); + int64_t StackMapIdx = isAnyRegCC ? StartIdx + 5 : + StartIdx + 5 + MI.getOperand(StartIdx + 3).getImm(); + assert(StackMapIdx <= MI.getNumOperands() && + "Patchpoint intrinsic dropped arguments."); SM.recordStackMap(MI, ID, llvm::next(MI.operands_begin(), StackMapIdx), - getStackMapEndMOP(MI.operands_begin(), MI.operands_end())); + getStackMapEndMOP(MI.operands_begin(), MI.operands_end()), + isAnyRegCC && hasDef); // Emit call. We need to know how many bytes we encoded here. unsigned EncodedBytes = 2; OutStreamer.EmitInstruction(MCInstBuilder(X86::CALL64r) - .addReg(MI.getOperand(2).getReg())); + .addReg(MI.getOperand(StartIdx + 2).getReg())); // Emit padding. - unsigned NumNOPBytes = MI.getOperand(1).getImm(); + unsigned NumNOPBytes = MI.getOperand(StartIdx + 1).getImm(); assert(NumNOPBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 0cb9ac38bce..75987157dc1 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -239,6 +239,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { case CallingConv::HiPE: return CSR_NoRegs_SaveList; + case CallingConv::WebKit_JS: + case CallingConv::AnyReg: + return CSR_MostRegs_64_SaveList; + case CallingConv::Intel_OCL_BI: { bool HasAVX = TM.getSubtarget().hasAVX(); bool HasAVX512 = TM.getSubtarget().hasAVX512(); @@ -296,6 +300,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } if (CC == CallingConv::GHC || CC == CallingConv::HiPE) return CSR_NoRegs_RegMask; + if (CC == CallingConv::WebKit_JS || CC == CallingConv::AnyReg) + return CSR_MostRegs_64_RegMask; if (!Is64Bit) return CSR_32_RegMask; if (CC == CallingConv::Cold) diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll new file mode 100644 index 00000000000..710b6a088fa --- /dev/null +++ b/test/CodeGen/X86/anyregcc.ll @@ -0,0 +1,289 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +; Stackmap Header: no constants - 6 callsites +; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; Header +; CHECK-NEXT: .long 0 +; Num Constants +; CHECK-NEXT: .long 0 +; Num Callsites +; CHECK-NEXT: .long 6 + +; test +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long L{{.*}}-_test +; CHECK-NEXT: .short 0 +; 3 locations +; CHECK-NEXT: .short 3 +; Loc 0: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Constant 3 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 3 +define i64 @test() nounwind ssp uwtable { +entry: + call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 12, i8* null, i32 2, i32 1, i32 2, i64 3) + ret i64 0 +} + +; property access 1 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long L{{.*}}-_property_access1 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access1(i8* %obj) nounwind ssp uwtable { +entry: + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 12, i8* %f, i32 1, i8* %obj) + ret i64 %ret +} + +; property access 2 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long L{{.*}}-_property_access2 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access2() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 12, i8* %f, i32 1, i64* %obj) + ret i64 %ret +} + +; property access 3 - %obj is a frame index +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long L{{.*}}-_property_access3 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register <-- this will be folded once folding for FI is implemented +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access3() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 12, i8* %f, i32 0, i64* %obj) + ret i64 %ret +} + +; anyreg_test1 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long L{{.*}}-_anyreg_test1 +; CHECK-NEXT: .short 0 +; 15 locations +; CHECK-NEXT: .short 15 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 14: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) nounwind ssp uwtable { +entry: + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 12, i8* %f, i32 14, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) + ret i64 %ret +} + +; anyreg_test2 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long L{{.*}}-_anyreg_test2 +; CHECK-NEXT: .short 0 +; 15 locations +; CHECK-NEXT: .short 15 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 14: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) nounwind ssp uwtable { +entry: + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 12, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) + ret i64 %ret +} + +declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...) +