From 4a544a79bd735967f1d33fe675ae4566dbd17813 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Tue, 6 Sep 2011 13:37:06 +0000 Subject: [PATCH] Split the init.trampoline intrinsic, which currently combines GCC's init.trampoline and adjust.trampoline intrinsics, into two intrinsics like in GCC. While having one combined intrinsic is tempting, it is not natural because typically the trampoline initialization needs to be done in one function, and the result of adjust trampoline is needed in a different (nested) function. To get around this llvm-gcc hacks the nested function lowering code to insert an additional parent variable holding the adjust.trampoline result that can be accessed from the child function. Dragonegg doesn't have the luxury of tweaking GCC code, so it stored the result of adjust.trampoline in the memory GCC set aside for the trampoline itself (this is always available in the child function), and set up some new memory (using an alloca) to hold the trampoline. Unfortunately this breaks Go which allocates trampoline memory on the heap and wants to use it even after the parent has exited (!). Rather than doing even more hacks to get Go working, it seemed best to just use two intrinsics like in GCC. Patch mostly by Sanjoy Das. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139140 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.html | 71 ++++++++++---- include/llvm/CodeGen/ISDOpcodes.h | 21 ++-- include/llvm/Intrinsics.td | 10 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 13 ++- lib/Target/PowerPC/PPCISelLowering.cpp | 22 +++-- lib/Target/PowerPC/PPCISelLowering.h | 3 +- lib/Target/X86/X86ISelLowering.cpp | 23 +++-- lib/Target/X86/X86ISelLowering.h | 3 +- lib/Target/XCore/XCoreISelLowering.cpp | 17 ++-- lib/Target/XCore/XCoreISelLowering.h | 3 +- lib/Transforms/InstCombine/InstCombine.h | 4 +- .../InstCombine/InstCombineCalls.cpp | 97 +++++++++++++++++-- lib/VMCore/AutoUpgrade.cpp | 46 +++++++++ test/Assembler/AutoUpgradeIntrinsics.ll | 13 +++ test/CodeGen/PowerPC/trampoline.ll | 6 +- test/CodeGen/XCore/trampoline.ll | 6 +- .../2008-01-14-VarArgTrampoline.ll | 6 +- 19 files changed, 289 insertions(+), 81 deletions(-) diff --git a/docs/LangRef.html b/docs/LangRef.html index 2b9ee24bf41..0ec08eb0a05 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -275,9 +275,10 @@
  • Debugger intrinsics
  • Exception Handling intrinsics
  • -
  • Trampoline Intrinsic +
  • Trampoline Intrinsics
    1. 'llvm.init.trampoline' Intrinsic
    2. +
    3. 'llvm.adjust.trampoline' Intrinsic
  • Atomic intrinsics @@ -7680,12 +7681,12 @@ LLVM.

    - Trampoline Intrinsic + Trampoline Intrinsics

    -

    This intrinsic makes it possible to excise one parameter, marked with +

    These intrinsics make it possible to excise one parameter, marked with the nest attribute, from a function. The result is a callable function pointer lacking the nest parameter - the caller does not need to @@ -7702,7 +7703,8 @@ LLVM.

       %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
       %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
    -  %p = call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
    +  call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
    +  %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
       %fp = bitcast i8* %p to i32 (i32, i32)*
     
    @@ -7720,12 +7722,12 @@ LLVM.

    Syntax:
    -  declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
    +  declare void @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
     
    Overview:
    -

    This fills the memory pointed to by tramp with code and returns a - function pointer suitable for executing it.

    +

    This fills the memory pointed to by tramp with executable code, + turning it into a trampoline.

    Arguments:

    The llvm.init.trampoline intrinsic takes three arguments, all @@ -7739,17 +7741,50 @@ LLVM.

    Semantics:

    The block of memory pointed to by tramp is filled with target - dependent code, turning it into a function. A pointer to this function is - returned, but needs to be bitcast to an appropriate - function pointer type before being called. The new function's signature - is the same as that of func with any arguments marked with - the nest attribute removed. At most one such nest argument - is allowed, and it must be of pointer type. Calling the new function is - equivalent to calling func with the same argument list, but - with nval used for the missing nest argument. If, after - calling llvm.init.trampoline, the memory pointed to - by tramp is modified, then the effect of any later call to the - returned function pointer is undefined.

    + dependent code, turning it into a function. Then tramp needs to be + passed to llvm.adjust.trampoline to get a pointer + which can be bitcast (to a new function) and + called. The new function's signature is the same as that of + func with any arguments marked with the nest attribute + removed. At most one such nest argument is allowed, and it must be of + pointer type. Calling the new function is equivalent to calling func + with the same argument list, but with nval used for the missing + nest argument. If, after calling llvm.init.trampoline, the + memory pointed to by tramp is modified, then the effect of any later call + to the returned function pointer is undefined.

    +
    + + +

    + + 'llvm.adjust.trampoline' Intrinsic + +

    + +
    + +
    Syntax:
    +
    +  declare i8* @llvm.adjust.trampoline(i8* <tramp>)
    +
    + +
    Overview:
    +

    This performs any required machine-specific adjustment to the address of a + trampoline (passed as tramp).

    + +
    Arguments:
    +

    tramp must point to a block of memory which already has trampoline code + filled in by a previous call to llvm.init.trampoline + .

    + +
    Semantics:
    +

    On some architectures the address of the code to be executed needs to be + different to the address where the trampoline is actually stored. This + intrinsic returns the executable address corresponding to tramp + after performing the required machine specific adjustments. + The pointer returned can then be bitcast and + executed. +

    diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 7f5625cd5e8..14b1378e726 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -566,14 +566,19 @@ namespace ISD { // HANDLENODE node - Used as a handle for various purposes. HANDLENODE, - // TRAMPOLINE - This corresponds to the init_trampoline intrinsic. - // It takes as input a token chain, the pointer to the trampoline, - // the pointer to the nested function, the pointer to pass for the - // 'nest' parameter, a SRCVALUE for the trampoline and another for - // the nested function (allowing targets to access the original - // Function*). It produces the result of the intrinsic and a token - // chain as output. - TRAMPOLINE, + // INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic. It + // takes as input a token chain, the pointer to the trampoline, the pointer + // to the nested function, the pointer to pass for the 'nest' parameter, a + // SRCVALUE for the trampoline and another for the nested function (allowing + // targets to access the original Function*). It produces a token chain as + // output. + INIT_TRAMPOLINE, + + // ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic. + // It takes a pointer to the trampoline and produces a (possibly) new + // pointer to the same trampoline with platform-specific adjustments + // applied. The pointer it returns points to an executable block of code. + ADJUST_TRAMPOLINE, // TRAP - Trapping instruction TRAP, diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 947cf1be7d4..7646eaaf67f 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -344,10 +344,14 @@ def int_annotation : Intrinsic<[llvm_anyint_ty], //===------------------------ Trampoline Intrinsics -----------------------===// // -def int_init_trampoline : Intrinsic<[llvm_ptr_ty], +def int_init_trampoline : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], - [IntrReadWriteArgMem]>, - GCCBuiltin<"__builtin_init_trampoline">; + [IntrReadWriteArgMem, NoCapture<0>]>, + GCCBuiltin<"__builtin_init_trampoline">; + +def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrReadArgMem]>, + GCCBuiltin<"__builtin_adjust_trampoline">; //===------------------------ Overflow Intrinsics -------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b9d841e9d8d..e672512256f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -872,7 +872,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; - case ISD::TRAMPOLINE: + case ISD::INIT_TRAMPOLINE: + case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: // These operations lie about being legal: when they claim to be legal, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1c7b93af3b1..ec7bfbe495c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6054,7 +6054,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTLZ: return "ctlz"; // Trampolines - case ISD::TRAMPOLINE: return "trampoline"; + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; case ISD::CONDCODE: switch (cast(this)->get()) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0b0b98d834e..d8fa0c93dbd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5016,12 +5016,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::TRAMPOLINE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), - Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); - setValue(&I, Res); - DAG.setRoot(Res.getValue(1)); + DAG.setRoot(Res); + return 0; + } + case Intrinsic::adjust_trampoline: { + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, + TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::gcroot: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index be94b08e75a..39e6c2412f6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -1373,8 +1374,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } -SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -1403,16 +1409,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair CallResult = - LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()), + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); - SDValue Ops[] = - { CallResult.first, CallResult.second }; - - return DAG.getMergeValues(Ops, 2, dl); + return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, @@ -4499,7 +4502,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, PPCSubTarget); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index a4f8e2a839e..602f70abfc4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -390,7 +390,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8005408412b..fa5f720ef0e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -504,7 +504,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -9406,8 +9407,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); } -SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -9471,9 +9477,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 22), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6); } else { const Function *Func = cast(cast(Op.getOperand(5))->getValue()); @@ -9553,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 6), false, false, 1); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4); } } @@ -10356,7 +10358,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index db590975776..e83fea95cba 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -821,7 +821,8 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index d23cfe0b92d..3926a7f87f4 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -147,7 +147,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; maxStoresPerMemmove = maxStoresPerMemmoveOptSize @@ -180,7 +181,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -789,7 +791,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, } SDValue XCoreTargetLowering:: -LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { +LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue XCoreTargetLowering:: +LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -841,9 +848,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(TrmpAddr, 16), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 246da9eee55..d6c5b329a0a 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -145,7 +145,8 @@ namespace llvm { SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index c6bdb089982..be4454b8789 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -214,7 +215,8 @@ private: Instruction *visitCallSite(CallSite CS); Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4c9cbdb4e63..7da3343fb4e 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -821,6 +820,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { return Simplifier.NewInstruction; } +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { @@ -880,10 +956,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { return EraseInstFromFunction(*CS.getInstruction()); } - if (BitCastInst *BC = dyn_cast(Callee)) - if (IntrinsicInst *In = dyn_cast(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); PointerType *PTy = cast(Callee->getType()); FunctionType *FTy = cast(PTy->getElementType()); @@ -1164,10 +1238,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return true; } -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. // -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { Value *Callee = CS.getCalledValue(); PointerType *PTy = cast(Callee->getType()); FunctionType *FTy = cast(PTy->getElementType()); @@ -1178,8 +1255,8 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (Attrs.hasAttrSomewhere(Attribute::Nest)) return 0; - IntrinsicInst *Tramp = - cast(cast(Callee)->getOperand(0)); + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); Function *NestF =cast(Tramp->getArgOperand(1)->stripPointerCasts()); PointerType *NestFPTy = cast(NestF->getType()); diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 572018105aa..04221d461d7 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -43,6 +43,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { switch (Name[0]) { default: break; + case 'i': + // This upgrades the old llvm.init.trampoline to the new + // llvm.init.trampoline and llvm.adjust.trampoline pair. + if (Name == "init.trampoline") { + // The new llvm.init.trampoline returns nothing. + if (FTy->getReturnType()->isVoidTy()) + break; + + assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!"); + + // Change the name of the old intrinsic so that we can play with its type. + std::string NameTmp = F->getName(); + F->setName(""); + NewFn = cast(M->getOrInsertFunction( + NameTmp, + Type::getVoidTy(M->getContext()), + FTy->getParamType(0), FTy->getParamType(1), + FTy->getParamType(2), (Type *)0)); + return true; + } case 'p': // This upgrades the llvm.prefetch intrinsic to accept one more parameter, // which is a instruction / data cache identifier. The old version only @@ -216,6 +236,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); break; } + case Intrinsic::init_trampoline: { + + // Transform + // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z) + // to + // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z) + // %tramp = call i8* llvm.adjust.trampoline (i8* %x) + + Function *AdjustTrampolineFn = + cast(Intrinsic::getDeclaration(F->getParent(), + Intrinsic::adjust_trampoline)); + + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI); + + Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2)); + + CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn, + CI->getArgOperand(0), + CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(AdjustCall); + CI->eraseFromParent(); + break; + } } } diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index eb4ac769be0..daffa3d3af9 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -40,3 +40,16 @@ define void @p(i8* %ptr) { tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1) ret void } + +declare i32 @nest_f(i8* nest, i32) +declare i8* @llvm.init.trampoline(i8*, i8*, i8*) + +define void @test_trampolines() { +; CHECK: call void @llvm.init.trampoline(i8* null, i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), i8* null) +; CHECK: call i8* @llvm.adjust.trampoline(i8* null) + + call i8* @llvm.init.trampoline(i8* null, + i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), + i8* null) + ret void +} diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll index bc05bb17635..91b201146b6 100644 --- a/test/CodeGen/PowerPC/trampoline.ll +++ b/test/CodeGen/PowerPC/trampoline.ll @@ -67,7 +67,8 @@ entry: store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4 %TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8* ; [#uses=1] %FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8* ; [#uses=1] - %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; [#uses=1] + call void @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; [#uses=1] + %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.91) store i8* %tramp, i8** %0, align 4 %5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; [#uses=1] %6 = load i8** %0, align 4 ; [#uses=1] @@ -113,7 +114,8 @@ return: ; preds = %entry ret %struct.objc_object* %retval5 } -declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare i8* @llvm.adjust.trampoline(i8*) nounwind define internal void @__helper_1.1632(%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* nest %CHAIN.8, %struct.__block_1* %_self, %struct.CGImage* %cgImage) nounwind { entry: diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll index 4e1aba025b2..6b42134997b 100644 --- a/test/CodeGen/XCore/trampoline.ll +++ b/test/CodeGen/XCore/trampoline.ll @@ -11,7 +11,8 @@ entry: %FRAME.0 = alloca %struct.FRAME.f, align 4 %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0 %FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8* - %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02) + call void @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02) + %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.23.sub) %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1 %1 = bitcast i8* %tramp to i32 ()* store i32 ()* %1, i32 ()** %0, align 4 @@ -32,6 +33,7 @@ entry: ret i32 %1 } -declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare i8* @llvm.adjust.trampoline(i8*) nounwind declare void @h(i32 ()*) diff --git a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll index 9bb94089393..aacea9df5b3 100644 --- a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll +++ b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll @@ -3,7 +3,8 @@ %struct.FRAME.nest = type { i32, i32 (...)* } %struct.__builtin_trampoline = type { [10 x i8] } -declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare i8* @llvm.adjust.trampoline(i8*) nounwind declare i32 @f(%struct.FRAME.nest* nest , ...) @@ -15,7 +16,8 @@ entry: %tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0 ; [#uses=1] store i32 %n, i32* %tmp3, align 8 %FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8* ; [#uses=1] - %tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 ) ; [#uses=1] + call void @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 ) ; [#uses=1] + %tramp = call i8* @llvm.adjust.trampoline( i8* %TRAMP.216.sub) %tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1 ; [#uses=1] %tmp89 = bitcast i8* %tramp to i32 (...)* ; [#uses=2] store i32 (...)* %tmp89, i32 (...)** %tmp7, align 8