[XCore] Move lowering of thread local storage to a separate pass.

Thread local storage is not supported by the XMOS linker so we handle
thread local variables by lowering the variable to an array of n elements
(where n is the number of hardware threads per core, currently 8
for all XMOS devices) indexed by the the current thread ID.

Previously this lowering was spread across the XCoreISelLowering and the
XCoreAsmPrinter classes. Moving this to a separate pass should be much
cleaner.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181124 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Osborne 2013-05-04 17:01:55 +00:00
parent effc16bb49
commit 597432fbe5
6 changed files with 158 additions and 55 deletions

View File

@ -15,6 +15,7 @@ add_llvm_target(XCoreCodeGen
XCoreInstrInfo.cpp
XCoreISelDAGToDAG.cpp
XCoreISelLowering.cpp
XCoreLowerThreadLocal.cpp
XCoreMachineFunctionInfo.cpp
XCoreMCInstLower.cpp
XCoreRegisterInfo.cpp

View File

@ -20,12 +20,16 @@
namespace llvm {
class FunctionPass;
class ModulePass;
class TargetMachine;
class XCoreTargetMachine;
class formatted_raw_ostream;
void initializeXCoreLowerThreadLocalPass(PassRegistry &p);
FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
CodeGenOpt::Level OptLevel);
ModulePass *createXCoreLowerThreadLocalPass();
} // end namespace llvm;

View File

@ -36,7 +36,6 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@ -46,12 +45,6 @@
#include <cctype>
using namespace llvm;
static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
cl::desc("Maximum number of threads (for emulation thread-local storage)"),
cl::Hidden,
cl::value_desc("number"),
cl::init(8));
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
@ -152,10 +145,10 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitAlignment(Align > 2 ? Align : 2, GV);
unsigned Size = TD->getTypeAllocSize(C->getType());
if (GV->isThreadLocal()) {
Size *= MaxThreads;
report_fatal_error("TLS is not supported by this target!");
}
unsigned Size = TD->getTypeAllocSize(C->getType());
if (MAI->hasDotTypeDotSizeDirective()) {
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
@ -164,10 +157,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.EmitLabel(GVSym);
EmitGlobalConstant(C);
if (GV->isThreadLocal()) {
for (unsigned i = 1; i < MaxThreads; ++i)
EmitGlobalConstant(C);
}
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)

View File

@ -120,9 +120,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
// Thread Local Storage
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
// Conversion of i64 -> double produces constantpool nodes
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
@ -172,7 +169,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode())
{
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
@ -255,44 +251,6 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
}
static inline bool isZeroLengthArray(Type *Ty) {
ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
return AT && (AT->getNumElements() == 0);
}
SDValue XCoreTargetLowering::
LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
// FIXME there isn't really debug info here
DebugLoc dl = Op.getDebugLoc();
// transform to label + getid() * size
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar) {
// If GV is an alias then use the aliasee to determine size
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
}
if (!GVar) {
llvm_unreachable("Thread local object not a GlobalVariable?");
}
Type *Ty = cast<PointerType>(GV->getType())->getElementType();
if (!Ty->isSized() || isZeroLengthArray(Ty)) {
#ifndef NDEBUG
errs() << "Size of thread local object " << GVar->getName()
<< " is unknown\n";
#endif
llvm_unreachable(0);
}
SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
const DataLayout *TD = TM.getDataLayout();
unsigned Size = TD->getTypeAllocSize(Ty);
SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
DAG.getConstant(Size, MVT::i32));
return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
}
SDValue XCoreTargetLowering::
LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
{

View File

@ -0,0 +1,145 @@
//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file contains a pass that lowers thread local variables on the
/// XCore.
///
//===----------------------------------------------------------------------===//
#include "XCore.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "xcore-lower-thread-local"
using namespace llvm;
static cl::opt<unsigned> MaxThreads(
"xcore-max-threads", cl::Optional,
cl::desc("Maximum number of threads (for emulation thread-local storage)"),
cl::Hidden, cl::value_desc("number"), cl::init(8));
namespace {
/// Lowers thread local variables on the XCore. Each thread local variable is
/// expanded to an array of n elements indexed by the thread ID where n is the
/// fixed number hardware threads supported by the device.
struct XCoreLowerThreadLocal : public ModulePass {
static char ID;
XCoreLowerThreadLocal() : ModulePass(ID) {
initializeXCoreLowerThreadLocalPass(*PassRegistry::getPassRegistry());
}
bool lowerGlobal(GlobalVariable *GV);
bool runOnModule(Module &M);
};
}
char XCoreLowerThreadLocal::ID = 0;
INITIALIZE_PASS(XCoreLowerThreadLocal, "xcore-lower-thread-local",
"Lower thread local variables", false, false)
ModulePass *llvm::createXCoreLowerThreadLocalPass() {
return new XCoreLowerThreadLocal();
}
static ArrayType *createLoweredType(Type *OriginalType) {
return ArrayType::get(OriginalType, MaxThreads);
}
static Constant *
createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
SmallVector<Constant *, 8> Elements(MaxThreads);
for (unsigned i = 0; i != MaxThreads; ++i) {
Elements[i] = OriginalInitializer;
}
return ConstantArray::get(NewType, Elements);
}
static bool hasNonInstructionUse(GlobalVariable *GV) {
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
++UI)
if (!isa<Instruction>(*UI))
return true;
return false;
}
static bool isZeroLengthArray(Type *Ty) {
ArrayType *AT = dyn_cast<ArrayType>(Ty);
return AT && (AT->getNumElements() == 0);
}
bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
Module *M = GV->getParent();
LLVMContext &Ctx = M->getContext();
if (!GV->isThreadLocal())
return false;
// Skip globals that we can't lower and leave it for the backend to error.
if (hasNonInstructionUse(GV) ||
!GV->getType()->isSized() || isZeroLengthArray(GV->getType()))
return false;
// Create replacement global.
ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
Constant *NewInitializer = createLoweredInitializer(NewType,
GV->getInitializer());
GlobalVariable *NewGV =
new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
GV->getType()->getAddressSpace(),
GV->isExternallyInitialized());
// Update uses.
SmallVector<User *, 16> Users(GV->use_begin(), GV->use_end());
for (unsigned I = 0, E = Users.size(); I != E; ++I) {
User *U = Users[I];
Instruction *Inst = cast<Instruction>(U);
IRBuilder<> Builder(Inst);
Function *GetID = Intrinsic::getDeclaration(GV->getParent(),
Intrinsic::xcore_getid);
Value *ThreadID = Builder.CreateCall(GetID);
SmallVector<Value *, 2> Indices;
Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
Indices.push_back(ThreadID);
Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
U->replaceUsesOfWith(GV, Addr);
}
// Remove old global.
NewGV->takeName(GV);
GV->eraseFromParent();
return true;
}
bool XCoreLowerThreadLocal::runOnModule(Module &M) {
// Find thread local globals.
bool MadeChange = false;
SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ++GVI) {
GlobalVariable *GV = GVI;
if (GV->isThreadLocal())
ThreadLocalGlobals.push_back(GV);
}
for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
}
return MadeChange;
}

View File

@ -46,6 +46,7 @@ public:
return getTM<XCoreTargetMachine>();
}
virtual bool addPreISel();
virtual bool addInstSelector();
};
} // namespace
@ -54,6 +55,11 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
return new XCorePassConfig(this, PM);
}
bool XCorePassConfig::addPreISel() {
addPass(createXCoreLowerThreadLocalPass());
return false;
}
bool XCorePassConfig::addInstSelector() {
addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
return false;