mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-05 12:31:33 +00:00
Revert r239082
llc crashed for NVPTX backend git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239094 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4dbd603825
commit
7c65b1bfb2
@ -20,7 +20,7 @@ set(NVPTXCodeGen_sources
|
|||||||
NVPTXImageOptimizer.cpp
|
NVPTXImageOptimizer.cpp
|
||||||
NVPTXInstrInfo.cpp
|
NVPTXInstrInfo.cpp
|
||||||
NVPTXLowerAggrCopies.cpp
|
NVPTXLowerAggrCopies.cpp
|
||||||
NVPTXLowerKernelArgs.cpp
|
NVPTXLowerStructArgs.cpp
|
||||||
NVPTXMCExpr.cpp
|
NVPTXMCExpr.cpp
|
||||||
NVPTXPrologEpilogPass.cpp
|
NVPTXPrologEpilogPass.cpp
|
||||||
NVPTXRegisterInfo.cpp
|
NVPTXRegisterInfo.cpp
|
||||||
|
@ -69,7 +69,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
|
|||||||
MachineFunctionPass *createNVPTXPrologEpilogPass();
|
MachineFunctionPass *createNVPTXPrologEpilogPass();
|
||||||
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
|
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
|
||||||
FunctionPass *createNVPTXImageOptimizerPass();
|
FunctionPass *createNVPTXImageOptimizerPass();
|
||||||
FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
|
FunctionPass *createNVPTXLowerStructArgsPass();
|
||||||
|
|
||||||
bool isImageOrSamplerVal(const Value *, const Module *);
|
bool isImageOrSamplerVal(const Value *, const Module *);
|
||||||
|
|
||||||
|
@ -1,170 +0,0 @@
|
|||||||
//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Pointer arguments to kernel functions need to be lowered specially.
|
|
||||||
//
|
|
||||||
// 1. Copy byval struct args to local memory. This is a preparation for handling
|
|
||||||
// cases like
|
|
||||||
//
|
|
||||||
// kernel void foo(struct A arg, ...)
|
|
||||||
// {
|
|
||||||
// struct A *p = &arg;
|
|
||||||
// ...
|
|
||||||
// ... = p->filed1 ... (this is no generic address for .param)
|
|
||||||
// p->filed2 = ... (this is no write access to .param)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the
|
|
||||||
// global address space. This allows later optimizations to emit
|
|
||||||
// ld.global.*/st.global.* for accessing these pointer arguments. For
|
|
||||||
// example,
|
|
||||||
//
|
|
||||||
// define void @foo(float* %input) {
|
|
||||||
// %v = load float, float* %input, align 4
|
|
||||||
// ...
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// becomes
|
|
||||||
//
|
|
||||||
// define void @foo(float* %input) {
|
|
||||||
// %input2 = addrspacecast float* %input to float addrspace(1)*
|
|
||||||
// %input3 = addrspacecast float addrspace(1)* %input2 to float*
|
|
||||||
// %v = load float, float* %input3, align 4
|
|
||||||
// ...
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to
|
|
||||||
//
|
|
||||||
// define void @foo(float* %input) {
|
|
||||||
// %input2 = addrspacecast float* %input to float addrspace(1)*
|
|
||||||
// %v = load float, float addrspace(1)* %input2, align 4
|
|
||||||
// ...
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
|
|
||||||
// don't cancel the addrspacecast pair this pass emits.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "NVPTX.h"
|
|
||||||
#include "NVPTXUtilities.h"
|
|
||||||
#include "NVPTXTargetMachine.h"
|
|
||||||
#include "llvm/IR/Function.h"
|
|
||||||
#include "llvm/IR/Instructions.h"
|
|
||||||
#include "llvm/IR/Module.h"
|
|
||||||
#include "llvm/IR/Type.h"
|
|
||||||
#include "llvm/Pass.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
class NVPTXLowerKernelArgs : public FunctionPass {
|
|
||||||
bool runOnFunction(Function &F) override;
|
|
||||||
|
|
||||||
// handle byval parameters
|
|
||||||
void handleByValParam(Argument *);
|
|
||||||
// handle non-byval pointer parameters
|
|
||||||
void handlePointerParam(Argument *);
|
|
||||||
|
|
||||||
public:
|
|
||||||
static char ID; // Pass identification, replacement for typeid
|
|
||||||
NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr)
|
|
||||||
: FunctionPass(ID), TM(TM) {}
|
|
||||||
const char *getPassName() const override {
|
|
||||||
return "Lower pointer arguments of CUDA kernels";
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const NVPTXTargetMachine *TM;
|
|
||||||
};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
char NVPTXLowerKernelArgs::ID = 1;
|
|
||||||
|
|
||||||
INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
|
|
||||||
"Lower kernel arguments (NVPTX)", false, false)
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// If the function had a byval struct ptr arg, say foo(%struct.x *byval %d),
|
|
||||||
// then add the following instructions to the first basic block:
|
|
||||||
//
|
|
||||||
// %temp = alloca %struct.x, align 8
|
|
||||||
// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)*
|
|
||||||
// %tv = load %struct.x addrspace(101)* %tempd
|
|
||||||
// store %struct.x %tv, %struct.x* %temp, align 8
|
|
||||||
//
|
|
||||||
// The above code allocates some space in the stack and copies the incoming
|
|
||||||
// struct from param space to local space.
|
|
||||||
// Then replace all occurences of %d by %temp.
|
|
||||||
// =============================================================================
|
|
||||||
void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
|
|
||||||
Function *Func = Arg->getParent();
|
|
||||||
Instruction *FirstInst = &(Func->getEntryBlock().front());
|
|
||||||
PointerType *PType = dyn_cast<PointerType>(Arg->getType());
|
|
||||||
|
|
||||||
assert(PType && "Expecting pointer type in handleByValParam");
|
|
||||||
|
|
||||||
Type *StructType = PType->getElementType();
|
|
||||||
AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
|
|
||||||
// Set the alignment to alignment of the byval parameter. This is because,
|
|
||||||
// later load/stores assume that alignment, and we are going to replace
|
|
||||||
// the use of the byval parameter with this alloca instruction.
|
|
||||||
AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
|
|
||||||
Arg->replaceAllUsesWith(AllocA);
|
|
||||||
|
|
||||||
Value *ArgInParam = new AddrSpaceCastInst(
|
|
||||||
Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
|
|
||||||
FirstInst);
|
|
||||||
LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst);
|
|
||||||
new StoreInst(LI, AllocA, FirstInst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
|
|
||||||
assert(!Arg->hasByValAttr() &&
|
|
||||||
"byval params should be handled by handleByValParam");
|
|
||||||
|
|
||||||
Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
|
|
||||||
Instruction *ArgInGlobal = new AddrSpaceCastInst(
|
|
||||||
Arg, PointerType::get(Arg->getType()->getPointerElementType(),
|
|
||||||
ADDRESS_SPACE_GLOBAL),
|
|
||||||
Arg->getName(), FirstInst);
|
|
||||||
Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
|
|
||||||
Arg->getName(), FirstInst);
|
|
||||||
// Replace with ArgInGeneric all uses of Args except ArgInGlobal.
|
|
||||||
Arg->replaceAllUsesWith(ArgInGeneric);
|
|
||||||
ArgInGlobal->setOperand(0, Arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Main function for this pass.
|
|
||||||
// =============================================================================
|
|
||||||
bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
|
|
||||||
// Skip non-kernels. See the comments at the top of this file.
|
|
||||||
if (!isKernelFunction(F))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (Argument &Arg : F.args()) {
|
|
||||||
if (Arg.getType()->isPointerTy()) {
|
|
||||||
if (Arg.hasByValAttr())
|
|
||||||
handleByValParam(&Arg);
|
|
||||||
else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
|
|
||||||
handlePointerParam(&Arg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionPass *
|
|
||||||
llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
|
|
||||||
return new NVPTXLowerKernelArgs(TM);
|
|
||||||
}
|
|
136
lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
Normal file
136
lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Copy struct args to local memory. This is needed for kernel functions only.
|
||||||
|
// This is a preparation for handling cases like
|
||||||
|
//
|
||||||
|
// kernel void foo(struct A arg, ...)
|
||||||
|
// {
|
||||||
|
// struct A *p = &arg;
|
||||||
|
// ...
|
||||||
|
// ... = p->filed1 ... (this is no generic address for .param)
|
||||||
|
// p->filed2 = ... (this is no write access to .param)
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "NVPTX.h"
|
||||||
|
#include "NVPTXUtilities.h"
|
||||||
|
#include "llvm/IR/Function.h"
|
||||||
|
#include "llvm/IR/Instructions.h"
|
||||||
|
#include "llvm/IR/IntrinsicInst.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/IR/Type.h"
|
||||||
|
#include "llvm/Pass.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class NVPTXLowerStructArgs : public FunctionPass {
|
||||||
|
bool runOnFunction(Function &F) override;
|
||||||
|
|
||||||
|
void handleStructPtrArgs(Function &);
|
||||||
|
void handleParam(Argument *);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static char ID; // Pass identification, replacement for typeid
|
||||||
|
NVPTXLowerStructArgs() : FunctionPass(ID) {}
|
||||||
|
const char *getPassName() const override {
|
||||||
|
return "Copy structure (byval *) arguments to stack";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
char NVPTXLowerStructArgs::ID = 1;
|
||||||
|
|
||||||
|
INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
|
||||||
|
"Lower structure arguments (NVPTX)", false, false)
|
||||||
|
|
||||||
|
void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
|
||||||
|
Function *Func = Arg->getParent();
|
||||||
|
Instruction *FirstInst = &(Func->getEntryBlock().front());
|
||||||
|
PointerType *PType = dyn_cast<PointerType>(Arg->getType());
|
||||||
|
|
||||||
|
assert(PType && "Expecting pointer type in handleParam");
|
||||||
|
|
||||||
|
Type *StructType = PType->getElementType();
|
||||||
|
AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
|
||||||
|
|
||||||
|
/* Set the alignment to alignment of the byval parameter. This is because,
|
||||||
|
* later load/stores assume that alignment, and we are going to replace
|
||||||
|
* the use of the byval parameter with this alloca instruction.
|
||||||
|
*/
|
||||||
|
AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
|
||||||
|
|
||||||
|
Arg->replaceAllUsesWith(AllocA);
|
||||||
|
|
||||||
|
// Get the cvt.gen.to.param intrinsic
|
||||||
|
Type *CvtTypes[] = {
|
||||||
|
Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
|
||||||
|
Type::getInt8PtrTy(Func->getParent()->getContext(),
|
||||||
|
ADDRESS_SPACE_GENERIC)};
|
||||||
|
Function *CvtFunc = Intrinsic::getDeclaration(
|
||||||
|
Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes);
|
||||||
|
|
||||||
|
Value *BitcastArgs[] = {
|
||||||
|
new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
|
||||||
|
ADDRESS_SPACE_GENERIC),
|
||||||
|
Arg->getName(), FirstInst)};
|
||||||
|
CallInst *CallCVT =
|
||||||
|
CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst);
|
||||||
|
|
||||||
|
BitCastInst *BitCast = new BitCastInst(
|
||||||
|
CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM),
|
||||||
|
Arg->getName(), FirstInst);
|
||||||
|
LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
|
||||||
|
new StoreInst(LI, AllocA, FirstInst);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
|
||||||
|
// add the following instructions to the first basic block :
|
||||||
|
//
|
||||||
|
// %temp = alloca %struct.x, align 8
|
||||||
|
// %tt1 = bitcast %struct.x * %d to i8 *
|
||||||
|
// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
|
||||||
|
// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
|
||||||
|
// %tv = load %struct.x addrspace(101) * %tempd
|
||||||
|
// store %struct.x %tv, %struct.x * %temp, align 8
|
||||||
|
//
|
||||||
|
// The above code allocates some space in the stack and copies the incoming
|
||||||
|
// struct from param space to local space.
|
||||||
|
// Then replace all occurences of %d by %temp.
|
||||||
|
// =============================================================================
|
||||||
|
void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
|
||||||
|
for (Argument &Arg : F.args()) {
|
||||||
|
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
|
||||||
|
handleParam(&Arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Main function for this pass.
|
||||||
|
// =============================================================================
|
||||||
|
bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
|
||||||
|
// Skip non-kernels. See the comments at the top of this file.
|
||||||
|
if (!isKernelFunction(F))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
handleStructPtrArgs(F);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
|
||||||
|
return new NVPTXLowerStructArgs();
|
||||||
|
}
|
@ -53,7 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
|
|||||||
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
|
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
|
||||||
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
|
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
|
||||||
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
|
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
|
||||||
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
|
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void LLVMInitializeNVPTXTarget() {
|
extern "C" void LLVMInitializeNVPTXTarget() {
|
||||||
@ -69,7 +69,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
|
|||||||
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
|
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
|
||||||
initializeNVPTXFavorNonGenericAddrSpacesPass(
|
initializeNVPTXFavorNonGenericAddrSpacesPass(
|
||||||
*PassRegistry::getPassRegistry());
|
*PassRegistry::getPassRegistry());
|
||||||
initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
|
initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string computeDataLayout(bool is64Bit) {
|
static std::string computeDataLayout(bool is64Bit) {
|
||||||
@ -163,7 +163,6 @@ void NVPTXPassConfig::addIRPasses() {
|
|||||||
TargetPassConfig::addIRPasses();
|
TargetPassConfig::addIRPasses();
|
||||||
addPass(createNVPTXAssignValidGlobalNamesPass());
|
addPass(createNVPTXAssignValidGlobalNamesPass());
|
||||||
addPass(createGenericToNVVMPass());
|
addPass(createGenericToNVVMPass());
|
||||||
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
|
|
||||||
addPass(createNVPTXFavorNonGenericAddrSpacesPass());
|
addPass(createNVPTXFavorNonGenericAddrSpacesPass());
|
||||||
// FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
|
// FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
|
||||||
// them unused. We could remove dead code in an ad-hoc manner, but that
|
// them unused. We could remove dead code in an ad-hoc manner, but that
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: opt < %s -nvptx-lower-kernel-args -S | FileCheck %s
|
; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||||
target triple = "nvptx64-unknown-unknown"
|
target triple = "nvptx64-unknown-unknown"
|
||||||
@ -8,8 +8,9 @@ target triple = "nvptx64-unknown-unknown"
|
|||||||
; Function Attrs: nounwind
|
; Function Attrs: nounwind
|
||||||
define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 {
|
define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @_Z11TakesStruct1SPi
|
; CHECK-LABEL @_Z22TakesStruct1SPi
|
||||||
; CHECK: addrspacecast %struct.S* %input to %struct.S addrspace(101)*
|
; CHECK: bitcast %struct.S* %input to i8*
|
||||||
|
; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
|
||||||
%b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
|
%b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
|
||||||
%0 = load i32, i32* %b, align 4
|
%0 = load i32, i32* %b, align 4
|
||||||
store i32 %0, i32* %output, align 4
|
store i32 %0, i32* %output, align 4
|
||||||
|
@ -24,10 +24,7 @@ entry:
|
|||||||
; CHECK: cvta.local.u64 %SP, %rd[[BUF_REG]]
|
; CHECK: cvta.local.u64 %SP, %rd[[BUF_REG]]
|
||||||
|
|
||||||
; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
|
; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
|
||||||
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
|
; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]]
|
||||||
; FIXME: casting A1_REG to A2_REG is unnecessary; A2_REG is essentially A_REG
|
|
||||||
; CHECK: cvta.global.u64 %rd[[A2_REG:[0-9]+]], %rd[[A1_REG]]
|
|
||||||
; CHECK: ld.global.f32 %f[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
|
|
||||||
; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
|
; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
|
||||||
|
|
||||||
%0 = load float, float* %a, align 4
|
%0 = load float, float* %a, align 4
|
||||||
@ -51,7 +48,7 @@ entry:
|
|||||||
|
|
||||||
; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0
|
; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0
|
||||||
; CHECK: .param .b64 param0;
|
; CHECK: .param .b64 param0;
|
||||||
; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A2_REG]]
|
; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A_REG]]
|
||||||
; CHECK-NEXT: .param .b64 param1;
|
; CHECK-NEXT: .param .b64 param1;
|
||||||
; CHECK-NEXT: st.param.b64 [param1+0], %rd[[SP_REG]]
|
; CHECK-NEXT: st.param.b64 [param1+0], %rd[[SP_REG]]
|
||||||
; CHECK-NEXT: call.uni
|
; CHECK-NEXT: call.uni
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
|
||||||
target triple = "nvptx64-unknown-unknown"
|
|
||||||
|
|
||||||
; Verify that both %input and %output are converted to global pointers and then
|
|
||||||
; addrspacecast'ed back to the original type.
|
|
||||||
define void @kernel(float* %input, float* %output) {
|
|
||||||
; CHECK-LABEL: .visible .entry kernel(
|
|
||||||
; CHECK: cvta.to.global.u64
|
|
||||||
; CHECK: cvta.to.global.u64
|
|
||||||
%1 = load float, float* %input, align 4
|
|
||||||
; CHECK: ld.global.f32
|
|
||||||
store float %1, float* %output, align 4
|
|
||||||
; CHECK: st.global.f32
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
!nvvm.annotations = !{!0}
|
|
||||||
!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
|
|
@ -3,19 +3,19 @@
|
|||||||
|
|
||||||
define ptx_kernel void @t1(i1* %a) {
|
define ptx_kernel void @t1(i1* %a) {
|
||||||
; PTX32: mov.u16 %rs{{[0-9]+}}, 0;
|
; PTX32: mov.u16 %rs{{[0-9]+}}, 0;
|
||||||
; PTX32-NEXT: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
|
; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
|
||||||
; PTX64: mov.u16 %rs{{[0-9]+}}, 0;
|
; PTX64: mov.u16 %rs{{[0-9]+}}, 0;
|
||||||
; PTX64-NEXT: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}};
|
; PTX64-NEXT: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}};
|
||||||
store i1 false, i1* %a
|
store i1 false, i1* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define ptx_kernel void @t2(i1* %a, i8* %b) {
|
define ptx_kernel void @t2(i1* %a, i8* %b) {
|
||||||
; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||||
; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
; PTX64: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
; PTX64: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||||
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
|
|
||||||
|
@ -18,8 +18,8 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
|
|||||||
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||||
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||||
%ret = sitofp i32 %val to float
|
%ret = sitofp i32 %val to float
|
||||||
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||||
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||||
store float %ret, float* %red
|
store float %ret, float* %red
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
@ -37,8 +37,8 @@ define void @bar(float* %red, i32 %idx) {
|
|||||||
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||||
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||||
%ret = sitofp i32 %val to float
|
%ret = sitofp i32 %val to float
|
||||||
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||||
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||||
store float %ret, float* %red
|
store float %ret, float* %red
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -16,8 +16,8 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
|
|||||||
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
|
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
|
||||||
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
|
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
|
||||||
%ret = extractvalue { float, float, float, float } %val, 0
|
%ret = extractvalue { float, float, float, float } %val, 0
|
||||||
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
|
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||||
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
|
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||||
store float %ret, float* %red
|
store float %ret, float* %red
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
@ -34,8 +34,8 @@ define void @bar(float* %red, i32 %idx) {
|
|||||||
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
|
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
|
||||||
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
|
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
|
||||||
%ret = extractvalue { float, float, float, float } %val, 0
|
%ret = extractvalue { float, float, float, float } %val, 0
|
||||||
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
|
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||||
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
|
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||||
store float %ret, float* %red
|
store float %ret, float* %red
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user