diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index abe117d0e5b..78fbd456187 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1128,7 +1128,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; // Special case: ABI requires that we use .u8 for predicates if (ETy->isIntegerTy(1)) @@ -1310,7 +1310,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; @@ -1349,17 +1349,6 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (ATy) return getOpenCLAlignment(TD, ATy->getElementType()); - const VectorType *VTy = dyn_cast(Ty); - if (VTy) { - Type *ETy = VTy->getElementType(); - unsigned int numE = VTy->getNumElements(); - unsigned int alignE = TD->getPrefTypeAlignment(ETy); - if (numE == 3) - return 4 * alignE; - else - return numE * alignE; - } - const StructType *STy = dyn_cast(Ty); if (STy) { unsigned int alignStruct = 1; diff --git a/test/CodeGen/NVPTX/nvcl-param-align.ll b/test/CodeGen/NVPTX/nvcl-param-align.ll new file mode 100644 index 00000000000..c1a489f1fc4 --- /dev/null +++ b/test/CodeGen/NVPTX/nvcl-param-align.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-unknown-nvcl" + +; CHECK-LABEL: .entry foo( +define void @foo(i64 %img, i64 %sampler, <5 x float>* %v) { +; The parameter alignment should be the next power of 2 of 5xsizeof(float), +; which is 32. +; CHECK: .param .u32 .ptr .align 32 foo_param_2 + ret void +} + +!nvvm.annotations = !{!1, !2, !3} +!1 = !{void (i64, i64, <5 x float>*)* @foo, !"kernel", i32 1} +!2 = !{void (i64, i64, <5 x float>*)* @foo, !"rdoimage", i32 0} +!3 = !{void (i64, i64, <5 x float>*)* @foo, !"sampler", i32 1} diff --git a/test/CodeGen/NVPTX/vector-global.ll b/test/CodeGen/NVPTX/vector-global.ll new file mode 100644 index 00000000000..a463bee3a47 --- /dev/null +++ b/test/CodeGen/NVPTX/vector-global.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@g1 = external global <4 x i32> ; external global variable +; CHECK: .extern .global .align 16 .b8 g1[16]; +@g2 = global <4 x i32> zeroinitializer ; module-level global variable +; CHECK: .visible .global .align 16 .b8 g2[16];