mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
Reapply: R600: Make sure to inline all internal functions
Function calls aren't supported yet. This was reverted due to build breakages, which should be fixed now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221173 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3f4e9cb609
commit
fbd383c93c
@ -57,6 +57,7 @@ extern char &SILoadStoreOptimizerID;
|
|||||||
FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
|
FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
|
||||||
Pass *createAMDGPUStructurizeCFGPass();
|
Pass *createAMDGPUStructurizeCFGPass();
|
||||||
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
|
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
|
||||||
|
ModulePass *createAMDGPUAlwaysInlinePass();
|
||||||
|
|
||||||
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
|
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
|
||||||
ImmutablePass *
|
ImmutablePass *
|
||||||
|
66
lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
Normal file
66
lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
/// \file
|
||||||
|
/// This pass marks all internal functions as always_inline and creates
|
||||||
|
/// duplicates of all other functions a marks the duplicates as always_inline.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/Transforms/Utils/Cloning.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class AMDGPUAlwaysInline : public ModulePass {
|
||||||
|
|
||||||
|
static char ID;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPUAlwaysInline() : ModulePass(ID) { }
|
||||||
|
bool runOnModule(Module &M) override;
|
||||||
|
const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End anonymous namespace
|
||||||
|
|
||||||
|
char AMDGPUAlwaysInline::ID = 0;
|
||||||
|
|
||||||
|
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
||||||
|
|
||||||
|
std::vector<Function*> FuncsToClone;
|
||||||
|
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
|
||||||
|
Function &F = *I;
|
||||||
|
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
|
||||||
|
FuncsToClone.push_back(&F);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Function *F : FuncsToClone) {
|
||||||
|
ValueToValueMapTy VMap;
|
||||||
|
Function *NewFunc = CloneFunction(F, VMap, false);
|
||||||
|
NewFunc->setLinkage(GlobalValue::InternalLinkage);
|
||||||
|
F->getParent()->getFunctionList().push_back(NewFunc);
|
||||||
|
F->replaceAllUsesWith(NewFunc);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
|
||||||
|
Function &F = *I;
|
||||||
|
if (F.hasLocalLinkage()) {
|
||||||
|
F.addFnAttr(Attribute::AlwaysInline);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
|
||||||
|
return new AMDGPUAlwaysInline();
|
||||||
|
}
|
@ -80,6 +80,7 @@ public:
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void addIRPasses() override;
|
||||||
void addCodeGenPrepare() override;
|
void addCodeGenPrepare() override;
|
||||||
bool addPreISel() override;
|
bool addPreISel() override;
|
||||||
bool addInstSelector() override;
|
bool addInstSelector() override;
|
||||||
@ -106,6 +107,19 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
|
|||||||
PM.add(createAMDGPUTargetTransformInfoPass(this));
|
PM.add(createAMDGPUTargetTransformInfoPass(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AMDGPUPassConfig::addIRPasses() {
|
||||||
|
// Function calls are not supported, so make sure we inline everything.
|
||||||
|
addPass(createAMDGPUAlwaysInlinePass());
|
||||||
|
addPass(createAlwaysInlinerPass());
|
||||||
|
// We need to add the barrier noop pass, otherwise adding the function
|
||||||
|
// inlining pass will cause all of the PassConfigs passes to be run
|
||||||
|
// one function at a time, which means if we have a nodule with two
|
||||||
|
// functions, then we will generate code for the first function
|
||||||
|
// without ever running any passes on the second.
|
||||||
|
addPass(createBarrierNoopPass());
|
||||||
|
TargetPassConfig::addIRPasses();
|
||||||
|
}
|
||||||
|
|
||||||
void AMDGPUPassConfig::addCodeGenPrepare() {
|
void AMDGPUPassConfig::addCodeGenPrepare() {
|
||||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||||
if (ST.isPromoteAllocaEnabled()) {
|
if (ST.isPromoteAllocaEnabled()) {
|
||||||
|
@ -13,6 +13,7 @@ add_public_tablegen_target(AMDGPUCommonTableGen)
|
|||||||
|
|
||||||
add_llvm_target(R600CodeGen
|
add_llvm_target(R600CodeGen
|
||||||
AMDILCFGStructurizer.cpp
|
AMDILCFGStructurizer.cpp
|
||||||
|
AMDGPUAlwaysInlinePass.cpp
|
||||||
AMDGPUAsmPrinter.cpp
|
AMDGPUAsmPrinter.cpp
|
||||||
AMDGPUFrameLowering.cpp
|
AMDGPUFrameLowering.cpp
|
||||||
AMDGPUIntrinsicInfo.cpp
|
AMDGPUIntrinsicInfo.cpp
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
|
; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
|
||||||
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
|
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
|
||||||
|
|
||||||
; CHECK: error: unsupported call to function defined_function in test_call
|
; CHECK: error: unsupported call to function external_function in test_call_external
|
||||||
|
|
||||||
|
|
||||||
declare i32 @external_function(i32) nounwind
|
declare i32 @external_function(i32) nounwind
|
||||||
|
24
test/CodeGen/R600/inline-calls.ll
Normal file
24
test/CodeGen/R600/inline-calls.ll
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK-NOT: {{^}}func:
|
||||||
|
define internal fastcc i32 @func(i32 %a) {
|
||||||
|
entry:
|
||||||
|
%tmp0 = add i32 %a, 1
|
||||||
|
ret i32 %tmp0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: {{^}}kernel:
|
||||||
|
define void @kernel(i32 addrspace(1)* %out) {
|
||||||
|
entry:
|
||||||
|
%tmp0 = call i32 @func(i32 1)
|
||||||
|
store i32 %tmp0, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: {{^}}kernel2:
|
||||||
|
define void @kernel2(i32 addrspace(1)* %out) {
|
||||||
|
entry:
|
||||||
|
call void @kernel(i32 addrspace(1)* %out)
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user