[Stackmap] Materialize the jump address within the patchpoint noop slide.

This patch moves the jump address materialization inside the noop slide. This
enables patching of the materialization itself or its complete removal. This
patch also adds the ability to define scratch registers that can be used safely
by the code called from the patchpoint intrinsic. At least one scratch register
is required, because that one is used for the materialization of the jump
address. This patch depends on D2009.

Differential Revision: http://llvm-reviews.chandlerc.com/D2074

Reviewed by Andy

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194306 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2013-11-09 01:51:33 +00:00
parent 999ffb6085
commit d4f5a61567
9 changed files with 86 additions and 57 deletions

View File

@ -2056,6 +2056,12 @@ public:
return VT.bitsLT(MinVT) ? MinVT : VT;
}
/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const {
return NULL;
}
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do

View File

@ -728,10 +728,14 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
unsigned NumDefs = II.getNumDefs();
const uint16_t *ScratchRegs = NULL;
// Handle PATCHPOINT specially and then use the generic code.
if (Opc == TargetOpcode::PATCHPOINT)
if (Opc == TargetOpcode::PATCHPOINT) {
unsigned CC = Node->getConstantOperandVal(4);
NumDefs = NumResults;
ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
}
unsigned NumImpUses = 0;
unsigned NodeOperands =
@ -767,6 +771,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
// Add scratch registers as implicit def and early clobber
if (ScratchRegs)
for (unsigned i = 0; ScratchRegs[i]; ++i)
MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
RegState::EarlyClobber);
// Transfer all of the memory reference descriptions of this instruction.
MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
cast<MachineSDNode>(Node)->memoperands_end());

View File

@ -6832,7 +6832,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// [Args...],
// [live variables...])
unsigned CC = CI.getCallingConv();
CallingConv::ID CC = CI.getCallingConv();
bool isAnyRegCC = CC == CallingConv::AnyReg;
bool hasDef = !CI.getType()->isVoidTy();
SDValue Callee = getValue(CI.getOperand(2)); // <target>
@ -6876,7 +6876,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
}
// Assume that the Callee is a constant address.
Ops.push_back(
DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue()));
DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
/*isTarget=*/true));
// Adjust <numArgs> to account for any arguments that have been passed on the
// stack instead.
@ -6886,7 +6887,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
// Add the calling convention
Ops.push_back(DAG.getTargetConstant(CC, MVT::i32));
Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
// Add the arguments we omitted previously. The register allocator should
// place these in any free register.

View File

@ -1773,6 +1773,11 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
static const uint16_t ScratchRegs[] = { X86::R11, 0 };
return ScratchRegs;
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,

View File

@ -925,6 +925,8 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const;
/// Utility function to emit atomic-load-arith operations (and, or, xor,
/// nand, max, min, umax, umin). It takes the corresponding instruction to
/// expand, the associated machine basic block, and the associated X86

View File

@ -785,6 +785,7 @@ static void LowerPATCHPOINT(MCStreamer &OutStreamer,
!MI.getOperand(0).isImplicit();
unsigned StartIdx = hasDef ? 1 : 0;
#ifndef NDEBUG
{
unsigned StartIdx2 = 0, e = MI.getNumOperands();
while (StartIdx2 < e && MI.getOperand(StartIdx2).isReg() &&
MI.getOperand(StartIdx2).isDef() &&
@ -793,8 +794,20 @@ static void LowerPATCHPOINT(MCStreamer &OutStreamer,
assert(StartIdx == StartIdx2 &&
"Unexpected additonal definition in Patchpoint intrinsic.");
}
#endif
// Find the first scratch register (implicit def and early clobber)
unsigned ScratchIdx = StartIdx, e = MI.getNumOperands();
while (ScratchIdx < e &&
!(MI.getOperand(ScratchIdx).isReg() &&
MI.getOperand(ScratchIdx).isDef() &&
MI.getOperand(ScratchIdx).isImplicit() &&
MI.getOperand(ScratchIdx).isEarlyClobber()))
++ScratchIdx;
assert(ScratchIdx != e && "No scratch register available");
int64_t ID = MI.getOperand(StartIdx).getImm();
assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs");
@ -812,10 +825,16 @@ static void LowerPATCHPOINT(MCStreamer &OutStreamer,
getStackMapEndMOP(MI.operands_begin(), MI.operands_end()),
isAnyRegCC && hasDef);
// Emit call. We need to know how many bytes we encoded here.
unsigned EncodedBytes = 2;
// Emit MOV to materialize the target address and the CALL to target.
// This is encoded with 12-13 bytes, depending on which register is used.
// We conservatively assume that it is 12 bytes and emit in worst case one
// extra NOP byte.
unsigned EncodedBytes = 12;
OutStreamer.EmitInstruction(MCInstBuilder(X86::MOV64ri)
.addReg(MI.getOperand(ScratchIdx).getReg())
.addImm(MI.getOperand(StartIdx + 2).getImm()));
OutStreamer.EmitInstruction(MCInstBuilder(X86::CALL64r)
.addReg(MI.getOperand(StartIdx + 2).getReg()));
.addReg(MI.getOperand(ScratchIdx).getReg()));
// Emit padding.
unsigned NumNOPBytes = MI.getOperand(StartIdx + 1).getImm();

View File

@ -33,7 +33,7 @@
; CHECK-NEXT: .long 3
define i64 @test() nounwind ssp uwtable {
entry:
call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 12, i8* null, i32 2, i32 1, i32 2, i64 3)
call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
ret i64 0
}
@ -56,7 +56,7 @@ entry:
define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to i8*
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 12, i8* %f, i32 1, i8* %obj)
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 15, i8* %f, i32 1, i8* %obj)
ret i64 %ret
}
@ -80,7 +80,7 @@ define i64 @property_access2() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 12297829382473034410 to i8*
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 12, i8* %f, i32 1, i64* %obj)
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %f, i32 1, i64* %obj)
ret i64 %ret
}
@ -104,7 +104,7 @@ define i64 @property_access3() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 12297829382473034410 to i8*
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 12, i8* %f, i32 0, i64* %obj)
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 15, i8* %f, i32 0, i64* %obj)
ret i64 %ret
}
@ -112,8 +112,8 @@ entry:
; CHECK-NEXT: .long 4
; CHECK-NEXT: .long L{{.*}}-_anyreg_test1
; CHECK-NEXT: .short 0
; 15 locations
; CHECK-NEXT: .short 15
; 14 locations
; CHECK-NEXT: .short 14
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
@ -184,15 +184,10 @@ entry:
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 14: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) nounwind ssp uwtable {
define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to i8*
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 12, i8* %f, i32 14, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14)
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
@ -200,8 +195,8 @@ entry:
; CHECK-NEXT: .long 5
; CHECK-NEXT: .long L{{.*}}-_anyreg_test2
; CHECK-NEXT: .short 0
; 15 locations
; CHECK-NEXT: .short 15
; 14 locations
; CHECK-NEXT: .short 14
; Loc 0: Register <-- this is the return register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
@ -272,15 +267,10 @@ entry:
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
; Loc 14: Register
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .long 0
define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14) nounwind ssp uwtable {
define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to i8*
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 12, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13, i8* %a14)
%ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}

View File

@ -2,25 +2,21 @@
; Trivial patchpoint codegen
;
; FIXME: We should verify that the call target is materialize after
; the label immediately before the call.
; <rdar://15187295> [JS] llvm.webkit.patchpoint call target should be
; materialized in nop slide.
define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: _trivial_patchpoint_codegen:
; CHECK: Ltmp
; CHECK: callq *%rax
; CHECK-LABEL: trivial_patchpoint_codegen:
; CHECK: movabsq $-559038736, %r11
; CHECK-NEXT: callq *%r11
; CHECK-NEXT: nop
; CHECK: movq %rax, %[[REG:r.+]]
; CHECK: callq *%rax
; CHECK: callq *%r11
; CHECK-NEXT: nop
; CHECK: movq %[[REG]], %rax
; CHECK: ret
%resolveCall2 = inttoptr i64 -559038736 to i8*
%result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 12, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%resolveCall3 = inttoptr i64 -559038737 to i8*
tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 3, i32 12, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret i64 %result
}
@ -48,18 +44,18 @@ entry:
define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: jscall_patchpoint_codegen:
; CHECK: Ltmp
; CHECK: movq %r{{.+}}, 8(%rsp)
; CHECK: movq %r{{.+}}, (%rsp)
; CHECK: movq $-559038736, %rax
; CHECK: Ltmp
; CHECK: callq *%rax
; CHECK: movq %rax, 8(%rsp)
; CHECK: callq
; CHECK: Ltmp
; CHECK: movq %r{{.+}}, 8(%rsp)
; CHECK: movq %r{{.+}}, (%rsp)
; CHECK: Ltmp
; CHECK-NEXT: movabsq $-559038736, %r11
; CHECK-NEXT: callq *%r11
; CHECK: movq %rax, 8(%rsp)
; CHECK: callq
%resolveCall2 = inttoptr i64 -559038736 to i8*
%result = tail call webkit_jscc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 12, i8* %resolveCall2, i32 2, i64 %p1, i64 %p2)
%result = tail call webkit_jscc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveCall2, i32 2, i64 %p1, i64 %p2)
%resolveCall3 = inttoptr i64 -559038737 to i8*
tail call webkit_jscc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 12, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
tail call webkit_jscc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret void
}

View File

@ -41,7 +41,7 @@
define void @constantargs() {
entry:
%0 = inttoptr i64 12345 to i8*
tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 1, i32 2, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 1, i32 15, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
ret void
}
@ -91,7 +91,7 @@ entry:
cold:
; OSR patchpoint with 12-byte nop-slide and 2 live vars.
%thunk = inttoptr i64 -559038737 to i8*
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 4, i32 12, i8* %thunk, i32 0, i64 %a, i64 %b)
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
unreachable
ret:
ret void
@ -108,7 +108,7 @@ ret:
define i64 @propertyRead(i64* %obj) {
entry:
%resolveRead = inttoptr i64 -559038737 to i8*
%result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 12, i8* %resolveRead, i32 1, i64* %obj)
%result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
%add = add i64 %result, 3
ret i64 %add
}
@ -124,7 +124,7 @@ entry:
define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
entry:
%resolveWrite = inttoptr i64 -559038737 to i8*
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 12, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
ret void
}
@ -147,7 +147,7 @@ entry:
define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 7, i32 12, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
ret void
}
@ -170,7 +170,7 @@ entry:
define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
%result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 8, i32 12, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%add = add i64 %result, 3
ret i64 %add
}
@ -190,13 +190,13 @@ entry:
define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 11, i32 12, i8* %resolveCall, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9)
call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 11, i32 15, i8* %resolveCall, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9)
; FIXME: The Spiller needs to be able to fold all rematted loads! This
; can be seen by adding %l15 to the stackmap.
; <rdar:/15202984> [JS] Ran out of registers during register allocation
; %resolveCall = inttoptr i64 -559038737 to i8*
; call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 12, i32 12, i8* %resolveCall, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
; call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 12, i32 15, i8* %resolveCall, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
ret void
}