From 623d2e618f4e672c47edff9ec63ed6d733ac81d3 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Fri, 8 Nov 2013 23:28:16 +0000 Subject: [PATCH] [Stackmap] Add AnyReg calling convention support for patchpoint intrinsic. The idea of the AnyReg Calling Convention is to provide the call arguments in registers, but not to force them to be placed in a paticular order into a specified set of registers. Instead it is up tp the register allocator to assign any register as it sees fit. The same applies to the return value (if applicable). Differential Revision: http://llvm-reviews.chandlerc.com/D2009 Reviewed by Andy git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194293 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/StackMaps.h | 6 +- include/llvm/IR/CallingConv.h | 4 + include/llvm/Target/Target.td | 4 +- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 2 + lib/AsmParser/LLToken.h | 2 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 25 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 93 ++++-- .../SelectionDAG/SelectionDAGBuilder.h | 3 +- lib/CodeGen/StackMaps.cpp | 13 +- lib/Target/X86/X86CallingConv.h | 35 +++ lib/Target/X86/X86CallingConv.td | 23 ++ lib/Target/X86/X86FastISel.cpp | 1 + lib/Target/X86/X86ISelLowering.cpp | 1 + lib/Target/X86/X86InstrInfo.cpp | 10 +- lib/Target/X86/X86MCInstLower.cpp | 35 ++- lib/Target/X86/X86RegisterInfo.cpp | 6 + test/CodeGen/X86/anyregcc.ll | 289 ++++++++++++++++++ 18 files changed, 503 insertions(+), 50 deletions(-) create mode 100644 lib/Target/X86/X86CallingConv.h create mode 100644 test/CodeGen/X86/anyregcc.ll diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index 40eeb2e6d0f..c79c3428b65 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -50,10 +50,12 @@ public: /// This should be called by the MC lowering code _immediately_ before /// lowering the MI to an MCInst. It records where the operands for the /// instruction are stored, and outputs a label to record the offset of - /// the call from the start of the text section. + /// the call from the start of the text section. In special cases (e.g. AnyReg + /// calling convention) the return register is also recorded if requested. void recordStackMap(const MachineInstr &MI, uint32_t ID, MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE); + MachineInstr::const_mop_iterator MOE, + bool recordResult = false); /// If there is any stack map data, create a stack map section and serialize /// the map info into it. This clears the stack map data structures diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 35c7df9b2ac..4437af25574 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -54,6 +54,10 @@ namespace CallingConv { // WebKit JS - Calling convention for stack based JavaScript calls WebKit_JS = 12, + // AnyReg - Calling convention for dynamic register based calls (e.g. + // stackmap and patchpoint intrinsics). + AnyReg = 13, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 0c0b1edfe9b..3f6eae6bb20 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -807,9 +807,9 @@ def STACKMAP : Instruction { let mayLoad = 1; } def PATCHPOINT : Instruction { - let OutOperandList = (outs); + let OutOperandList = (outs unknown:$dst); let InOperandList = (ins i32imm:$id, i32imm:$nbytes, unknown:$callee, - i32imm:$nargs, variable_ops); + i32imm:$nargs, i32imm:$cc, variable_ops); let isCall = 1; let mayLoad = 1; } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 434376008e9..4c9439a3ecf 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -558,6 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_64_sysvcc); KEYWORD(x86_64_win64cc); KEYWORD(webkit_jscc); + KEYWORD(anyregcc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 079a532da63..ad2cbe39cdc 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1343,6 +1343,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'x86_64_sysvcc' /// ::= 'x86_64_win64cc' /// ::= 'webkit_jscc' +/// ::= 'anyregcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { @@ -1366,6 +1367,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break; case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break; case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; + case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index e8389e4024b..086ea95273d 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -91,7 +91,7 @@ namespace lltok { kw_ptx_kernel, kw_ptx_device, kw_spir_kernel, kw_spir_func, kw_x86_64_sysvcc, kw_x86_64_win64cc, - kw_webkit_jscc, + kw_webkit_jscc, kw_anyregcc, // Attributes: kw_attributes, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e107276359b..1ca200dc8bd 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -211,6 +211,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +219,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -722,10 +727,16 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) + NumDefs = NumResults; + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -748,12 +759,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. @@ -784,8 +795,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d8a2cfb64a2..913f517d85e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6733,7 +6733,8 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// intrinsic's operands need to participate in the calling convention. std::pair SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee) { + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6753,9 +6754,10 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Args.push_back(Entry); } - TargetLowering::CallLoweringInfo CLI(getRoot(), CI.getType(), - /*retSExt*/ false, /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, - NumArgs, CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); const TargetLowering *TLI = TM.getTargetLowering(); @@ -6824,32 +6826,38 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // void|i64 @llvm.experimental.patchpoint.void|i64(i32 , - // i32 , - // i8* , i32 , - // [Args...], [live variables...]) + // i32 , + // i8* , + // i32 , + // [Args...], + // [live variables...]) + unsigned CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); SDValue Callee = getValue(CI.getOperand(2)); // // Get the real number of arguments participating in the call unsigned NumArgs = - cast(getValue(CI.getArgOperand(3)))->getZExtValue(); + cast(getValue(CI.getArgOperand(3)))->getZExtValue(); // Skip the four meta args: , , , assert(CI.getNumArgOperands() >= NumArgs + 4 && "Not enough arguments provided to the patchpoint intrinsic"); + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair Result = - LowerCallOperands(CI, 4, NumArgs, Callee); + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + // Set the root to the target-lowered call chain. SDValue Chain = Result.second; DAG.setRoot(Chain); SDNode *CallEnd = Chain.getNode(); - if (!CI.getType()->isVoidTy()) { - setValue(&CI, Result.first); - if (CallEnd->getOpcode() == ISD::CopyFromReg) - CallEnd = CallEnd->getOperand(0).getNode(); - } + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + /// Get a call instruction from the call sequence chain. /// Tail calls are not allowed. assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && @@ -6870,10 +6878,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { Ops.push_back( DAG.getIntPtrConstant(cast(Callee)->getZExtValue())); - // Adjust to account for any stack arguments. + // Adjust to account for any arguments that have been passed on the + // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] - unsigned NumCallArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); - Ops.push_back(DAG.getTargetConstant(NumCallArgs, MVT::i32)); + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant(CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); // Push the arguments from the call instruction. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; @@ -6906,21 +6925,43 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { if (hasGlue) Ops.push_back(*(Call->op_end()-1)); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); - // Replace the target specific call node with a STACKMAP node. + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, getCurSDLoc(), NodeTys, Ops); - // PatchPoint generates no value, so nothing goes in the NodeMap. - // - // FIXME: with anyregcc calling convention it will need to be in the NodeMap - // and replace values. + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); - + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 6ecf5a0abbb..570f927069d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -622,7 +622,8 @@ public: std::pair LowerCallOperands(const CallInst &CI, unsigned ArgIdx, unsigned NumArgs, - SDValue Callee); + SDValue Callee, + bool useVoidTy = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 51580b479d8..89da78294fc 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -30,7 +30,8 @@ using namespace llvm; void StackMaps::recordStackMap(const MachineInstr &MI, uint32_t ID, MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE) { + MachineInstr::const_mop_iterator MOE, + bool recordResult) { MCContext &OutContext = AP.OutStreamer.getContext(); MCSymbol *MILabel = OutContext.CreateTempSymbol(); @@ -38,6 +39,16 @@ void StackMaps::recordStackMap(const MachineInstr &MI, uint32_t ID, LocationVec CallsiteLocs; + if (recordResult) { + std::pair ParseResult = + OpParser(MI.operands_begin(), llvm::next(MI.operands_begin(), 1)); + + Location &Loc = ParseResult.first; + assert(Loc.LocType == Location::Register && + "Stackmap return location must be a register."); + CallsiteLocs.push_back(Loc); + } + while (MOI != MOE) { std::pair ParseResult = OpParser(MOI, MOE); diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h new file mode 100644 index 00000000000..e76f9fda2db --- /dev/null +++ b/lib/Target/X86/X86CallingConv.h @@ -0,0 +1,35 @@ +//=== X86CallingConv.h - X86 Custom Calling Convention Routines -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the X86 Calling Convention that +// aren't done by tablegen. +// +//===----------------------------------------------------------------------===// + +#ifndef X86CALLINGCONV_H +#define X86CALLINGCONV_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" + +namespace llvm { + +inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + llvm_unreachable("The AnyReg calling convention is only supported by the " \ + "stackmap and patchpoint intrinsics."); + // gracefully fallback to X86 C calling convention on Release builds. + return false; +} + +} // End llvm namespace + +#endif + diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index f5c8d9fda43..a78b5c0a796 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -160,6 +160,17 @@ def RetCC_X86_64_WebKit_JS : CallingConv<[ CCIfType<[i64], CCAssignToReg<[RAX]>> ]>; +// X86-64 AnyReg return-value convention. No explicit register is specified for +// the return-value. The register allocator is allowed and expected to choose +// any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the X86 C calling convention. +def RetCC_X86_64_AnyReg : CallingConv<[ + CCCustom<"CC_X86_AnyReg_Error"> +]>; + // This is the root return-value convention for the X86-32 backend. def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. @@ -178,6 +189,7 @@ def RetCC_X86_64 : CallingConv<[ // Handle JavaScript calls. CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo>, + CCIfCC<"CallingConv::AnyReg", CCDelegateTo>, // Handle explicit CC selection CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, @@ -350,6 +362,16 @@ def CC_X86_64_WebKit_JS : CallingConv<[ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> ]>; +// No explicit register is specified for the AnyReg calling convention. The +// register allocator may assign the arguments to any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the X86 C calling convention. +def CC_X86_64_AnyReg : CallingConv<[ + CCCustom<"CC_X86_AnyReg_Error"> +]>; + //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// @@ -542,6 +564,7 @@ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo>, + CCIfCC<"CallingConv::AnyReg", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7984e76edd6..928dea91b4f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "X86.h" +#include "X86CallingConv.h" #include "X86ISelLowering.h" #include "X86InstrBuilder.h" #include "X86RegisterInfo.h" diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 86ad2621fb4..55bfab449a3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,6 +16,7 @@ #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86.h" +#include "X86CallingConv.h" #include "X86InstrBuilder.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 369b031113e..b81b244828b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4198,14 +4198,20 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, const SmallVectorImpl &Ops, int FrameIndex, const TargetInstrInfo &TII) { + bool hasDef = MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit(); + unsigned StartIdx = hasDef ? 1 : 0; + MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); bool isPatchPoint = MI->getOpcode() == TargetOpcode::PATCHPOINT; - unsigned StartIdx = isPatchPoint ? MI->getOperand(3).getImm() + 4 : 2; + StartIdx = isPatchPoint ? + StartIdx + MI->getOperand(StartIdx+3).getImm() + 5 : + StartIdx + 2; - // No need to fold the meta data and function arguments + // No need to fold return, the meta data, and function arguments for (unsigned i = 0; i < StartIdx; ++i) MIB.addOperand(MI->getOperand(i)); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 92a111843f6..fa15114f914 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -780,26 +780,45 @@ static void LowerSTACKMAP(MCStreamer &OutStreamer, static void LowerPATCHPOINT(MCStreamer &OutStreamer, X86MCInstLower &MCInstLowering, StackMaps &SM, - const MachineInstr &MI) -{ - int64_t ID = MI.getOperand(0).getImm(); + const MachineInstr &MI) { + bool hasDef = MI.getOperand(0).isReg() && MI.getOperand(0).isDef() && + !MI.getOperand(0).isImplicit(); + unsigned StartIdx = hasDef ? 1 : 0; +#ifndef NDEBUG + unsigned StartIdx2 = 0, e = MI.getNumOperands(); + while (StartIdx2 < e && MI.getOperand(StartIdx2).isReg() && + MI.getOperand(StartIdx2).isDef() && + !MI.getOperand(StartIdx2).isImplicit()) + ++StartIdx2; + + assert(StartIdx == StartIdx2 && + "Unexpected additonal definition in Patchpoint intrinsic."); +#endif + + int64_t ID = MI.getOperand(StartIdx).getImm(); assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); // Get the number of arguments participating in the call. This number was // adjusted during call lowering by subtracting stack args. - int64_t StackMapIdx = MI.getOperand(3).getImm() + 4; - assert(StackMapIdx <= MI.getNumOperands() && "Patchpoint dropped args."); + bool isAnyRegCC = MI.getOperand(StartIdx + 4).getImm() == CallingConv::AnyReg; + assert(((hasDef && isAnyRegCC) || !hasDef) && + "Only Patchpoints with AnyReg calling convention may have a result"); + int64_t StackMapIdx = isAnyRegCC ? StartIdx + 5 : + StartIdx + 5 + MI.getOperand(StartIdx + 3).getImm(); + assert(StackMapIdx <= MI.getNumOperands() && + "Patchpoint intrinsic dropped arguments."); SM.recordStackMap(MI, ID, llvm::next(MI.operands_begin(), StackMapIdx), - getStackMapEndMOP(MI.operands_begin(), MI.operands_end())); + getStackMapEndMOP(MI.operands_begin(), MI.operands_end()), + isAnyRegCC && hasDef); // Emit call. We need to know how many bytes we encoded here. unsigned EncodedBytes = 2; OutStreamer.EmitInstruction(MCInstBuilder(X86::CALL64r) - .addReg(MI.getOperand(2).getReg())); + .addReg(MI.getOperand(StartIdx + 2).getReg())); // Emit padding. - unsigned NumNOPBytes = MI.getOperand(1).getImm(); + unsigned NumNOPBytes = MI.getOperand(StartIdx + 1).getImm(); assert(NumNOPBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 0cb9ac38bce..75987157dc1 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -239,6 +239,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { case CallingConv::HiPE: return CSR_NoRegs_SaveList; + case CallingConv::WebKit_JS: + case CallingConv::AnyReg: + return CSR_MostRegs_64_SaveList; + case CallingConv::Intel_OCL_BI: { bool HasAVX = TM.getSubtarget().hasAVX(); bool HasAVX512 = TM.getSubtarget().hasAVX512(); @@ -296,6 +300,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } if (CC == CallingConv::GHC || CC == CallingConv::HiPE) return CSR_NoRegs_RegMask; + if (CC == CallingConv::WebKit_JS || CC == CallingConv::AnyReg) + return CSR_MostRegs_64_RegMask; if (!Is64Bit) return CSR_32_RegMask; if (CC == CallingConv::Cold) diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll new file mode 100644 index 00000000000..710b6a088fa --- /dev/null +++ b/test/CodeGen/X86/anyregcc.ll @@ -0,0 +1,289 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +; Stackmap Header: no constants - 6 callsites +; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; Header +; CHECK-NEXT: .long 0 +; Num Constants +; CHECK-NEXT: .long 0 +; Num Callsites +; CHECK-NEXT: .long 6 + +; test +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long L{{.*}}-_test +; CHECK-NEXT: .short 0 +; 3 locations +; CHECK-NEXT: .short 3 +; Loc 0: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Constant 3 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 3 +define i64 @test() nounwind ssp uwtable { +entry: + call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 12, i8* null, i32 2, i32 1, i32 2, i64 3) + ret i64 0 +} + +; property access 1 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long L{{.*}}-_property_access1 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access1(i8* %obj) nounwind ssp uwtable { +entry: + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 12, i8* %f, i32 1, i8* %obj) + ret i64 %ret +} + +; property access 2 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long L{{.*}}-_property_access2 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access2() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 12, i8* %f, i32 1, i64* %obj) + ret i64 %ret +} + +; property access 3 - %obj is a frame index +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long L{{.*}}-_property_access3 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register <-- this will be folded once folding for FI is implemented +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access3() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 12, i8* %f, i32 0, i64* %obj) + ret i64 %ret +} + +; anyreg_test1 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long L{{.*}}-_anyreg_test1 +; CHECK-NEXT: .short 0 +; 15 locations +; CHECK-NEXT: .short 15 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 14: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) nounwind ssp uwtable { +entry: + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 12, i8* %f, i32 14, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) + ret i64 %ret +} + +; anyreg_test2 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long L{{.*}}-_anyreg_test2 +; CHECK-NEXT: .short 0 +; 15 locations +; CHECK-NEXT: .short 15 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 14: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) nounwind ssp uwtable { +entry: + %f = inttoptr i64 12297829382473034410 to i8* + %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 12, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) + ret i64 %ret +} + +declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...) +