mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-27 17:31:33 +00:00
For the current Atom processor, the fastest way to handle a call
indirect through a memory address is to load the memory address into a register and then call indirect through the register. This patch implements this improvement by modifying SelectionDAG to force a function address which is a memory reference to be loaded into a virtual register. Patch by Sriram Murali. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178171 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e915047fed
commit
1edadea42f
@ -134,6 +134,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
|
||||
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||
"PadShortFunctions", "true",
|
||||
"Pad short functions">;
|
||||
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
|
||||
"CallRegIndirect", "true",
|
||||
"Call register indirect">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 processors supported.
|
||||
@ -181,7 +184,9 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
|
||||
def : ProcessorModel<"atom", AtomModel,
|
||||
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
|
||||
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
|
||||
FeatureSlowDivide, FeaturePadShortFunctions]>;
|
||||
FeatureSlowDivide,
|
||||
FeatureCallRegIndirect,
|
||||
FeaturePadShortFunctions]>;
|
||||
|
||||
// "Arrandale" along with corei3 and corei5
|
||||
def : ProcessorModel<"corei7", SandyBridgeModel,
|
||||
|
@ -2629,6 +2629,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
// Use indirect reference through register, when CALL uses a memory reference.
|
||||
if (Subtarget->callRegIndirect() &&
|
||||
Callee.getOpcode() == ISD::LOAD) {
|
||||
const TargetRegisterClass *AddrRegClass =
|
||||
getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned VReg = MRI.createVirtualRegister(AddrRegClass);
|
||||
SDValue tempValue = DAG.getCopyFromReg(Callee,
|
||||
dl, VReg, Callee.getValueType());
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VReg, tempValue, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(Callee);
|
||||
|
||||
|
@ -458,6 +458,7 @@ void X86Subtarget::initializeEnvironment() {
|
||||
HasSlowDivide = false;
|
||||
PostRAScheduler = false;
|
||||
PadShortFunctions = false;
|
||||
CallRegIndirect = false;
|
||||
stackAlignment = 4;
|
||||
// FIXME: this is a known good value for Yonah. How about others?
|
||||
MaxInlineSizeThreshold = 128;
|
||||
|
@ -159,6 +159,10 @@ protected:
|
||||
/// a stall when returning too early.
|
||||
bool PadShortFunctions;
|
||||
|
||||
/// CallRegIndirect - True if the Calls with memory reference should be converted
|
||||
/// to a register-based indirect call.
|
||||
bool CallRegIndirect;
|
||||
|
||||
/// stackAlignment - The minimum alignment known to hold of the stack frame on
|
||||
/// entry to the function and which must be maintained by every function.
|
||||
unsigned stackAlignment;
|
||||
@ -269,6 +273,7 @@ public:
|
||||
bool useLeaForSP() const { return UseLeaForSP; }
|
||||
bool hasSlowDivide() const { return HasSlowDivide; }
|
||||
bool padShortFunctions() const { return PadShortFunctions; }
|
||||
bool callRegIndirect() const { return CallRegIndirect; }
|
||||
|
||||
bool isAtom() const { return X86ProcFamily == IntelAtom; }
|
||||
|
||||
|
45
test/CodeGen/X86/atom-call-reg-indirect.ll
Normal file
45
test/CodeGen/X86/atom-call-reg-indirect.ll
Normal file
@ -0,0 +1,45 @@
|
||||
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM32 %s
|
||||
; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
|
||||
; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM64 %s
|
||||
; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
|
||||
|
||||
|
||||
; fn_ptr.ll
|
||||
%class.A = type { i32 (...)** }
|
||||
|
||||
define i32 @test1() #0 {
|
||||
;ATOM: test1
|
||||
entry:
|
||||
%call = tail call %class.A* @_Z3facv()
|
||||
%0 = bitcast %class.A* %call to void (%class.A*)***
|
||||
%vtable = load void (%class.A*)*** %0, align 8
|
||||
%1 = load void (%class.A*)** %vtable, align 8
|
||||
;ATOM32: movl (%ecx), %ecx
|
||||
;ATOM32: calll *%ecx
|
||||
;ATOM-NOT32: calll *(%ecx)
|
||||
;ATOM64: movq (%rcx), %rcx
|
||||
;ATOM64: callq *%rcx
|
||||
;ATOM-NOT64: callq *(%rcx)
|
||||
tail call void %1(%class.A* %call)
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare %class.A* @_Z3facv() #1
|
||||
|
||||
; virt_fn.ll
|
||||
@p = external global void (i32)**
|
||||
|
||||
define i32 @test2() #0 {
|
||||
;ATOM: test2
|
||||
entry:
|
||||
%0 = load void (i32)*** @p, align 8
|
||||
%1 = load void (i32)** %0, align 8
|
||||
;ATOM32: movl (%eax), %eax
|
||||
;ATOM32: calll *%eax
|
||||
;ATOM-NOT: calll *(%eax)
|
||||
;ATOM64: movq (%rax), %rax
|
||||
;ATOM64: callq *%rax
|
||||
;ATOM-NOT64: callq *(%rax)
|
||||
tail call void %1(i32 2)
|
||||
ret i32 0
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user