From 87a2b36cf686a3692ab4763be0dbbbcc8df302d0 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 29 Aug 2014 15:30:20 +0000 Subject: [PATCH] [NVPTX] Make the alignment an explicit argument to ldu/ldg Summary: Instead of specifying the alignment as metadata which may be destroyed by transformation passes, make the alignment the second argument to ldu/ldg intrinsic calls. Test Plan: ldu-ldg.ll ldu-i8.ll ldu-reg-plus-offset.ll Reviewers: eliben, meheff, jholewinski Reviewed By: meheff, jholewinski Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D5093 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216731 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsNVVM.td | 18 ++++++++++++------ lib/Target/NVPTX/NVPTXISelLowering.cpp | 22 ++-------------------- test/CodeGen/NVPTX/ldu-i8.ll | 6 ++---- test/CodeGen/NVPTX/ldu-ldg.ll | 20 ++++++++------------ test/CodeGen/NVPTX/ldu-reg-plus-offset.ll | 8 +++----- 5 files changed, 27 insertions(+), 47 deletions(-) diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index cd512841a1a..9deed414b50 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -797,24 +797,30 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType; // (space)i64* // Generated within nvvm. Use for ldu on sm_20 or later def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 55951e64a38..e3d62354285 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3270,16 +3270,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; } @@ -3299,16 +3290,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; } diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll index 9cc66755790..36c99b30425 100644 --- a/test/CodeGen/NVPTX/ldu-i8.ll +++ b/test/CodeGen/NVPTX/ldu-i8.ll @@ -2,15 +2,13 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*) +declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32) define i8 @foo(i8* %a) { ; Ensure we properly truncate off the high-order 24 bits ; CHECK: ldu.global.u8 ; CHECK: cvt.u32.u16 ; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4) ret i8 %val } - -!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll index 3b0619ff517..4bfd68c2242 100644 --- a/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/test/CodeGen/NVPTX/ldu-ldg.ll @@ -1,40 +1,36 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr) -declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr) -declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr) -declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr) +declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) +declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) ; CHECK: func0 define i8 @func0(i8 addrspace(1)* %ptr) { ; ldu.global.u8 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) ret i8 %val } ; CHECK: func1 define i32 @func1(i32 addrspace(1)* %ptr) { ; ldu.global.u32 - %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4) ret i32 %val } ; CHECK: func2 define i8 @func2(i8 addrspace(1)* %ptr) { ; ld.global.nc.u8 - %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) ret i8 %val } ; CHECK: func3 define i32 @func3(i32 addrspace(1)* %ptr) { ; ld.global.nc.u32 - %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4) ret i32 %val } - - - -!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll index 55707ea8510..fd35a750390 100644 --- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -7,15 +7,13 @@ define void @reg_plus_offset(i32* %a) { ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; %p2 = getelementptr i32* %a, i32 8 - %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1 + %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4) %p3 = getelementptr i32* %a, i32 9 - %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1 + %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4) %t3 = mul i32 %t1, %t2 store i32 %t3, i32* %a ret void } -!1 = metadata !{ i32 4 } - -declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*) +declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()