diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 755c9c2b1fe..94c7b50ac8e 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -69,6 +69,10 @@ class ReadNone : IntrinsicProperty { def IntrNoReturn : IntrinsicProperty; +// IntrNoduplicate - Calls to this intrinsic cannot be duplicated. +// Parallels the noduplicate attribute on LLVM IR functions. +def IntrNoDuplicate : IntrinsicProperty; + //===----------------------------------------------------------------------===// // Types used by intrinsics. //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index a372c22e434..7f72ce8b66f 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -730,15 +730,15 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType; // (space)i64* // Bar.Sync def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">, - Intrinsic<[], [], []>; + Intrinsic<[], [], [IntrNoDuplicate]>; def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">, - Intrinsic<[], [], []>; + Intrinsic<[], [], [IntrNoDuplicate]>; def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>; def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>; def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>; // Membar def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">, diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll new file mode 100644 index 00000000000..64745fcba3b --- /dev/null +++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -O3 -S | FileCheck %s + +; Make sure the call to syncthreads is not duplicate here by the LLVM +; optimizations, because it has the noduplicate attribute set. + +; CHECK: call void @llvm.cuda.syncthreads +; CHECK-NOT: call void @llvm.cuda.syncthreads + +; Function Attrs: nounwind +define void @foo(float* %output) #1 { +entry: + %output.addr = alloca float*, align 8 + store float* %output, float** %output.addr, align 8 + %0 = load float** %output.addr, align 8 + %arrayidx = getelementptr inbounds float* %0, i64 0 + %1 = load float* %arrayidx, align 4 + %conv = fpext float %1 to double + %cmp = fcmp olt double %conv, 1.000000e+01 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %2 = load float** %output.addr, align 8 + %3 = load float* %2, align 4 + %conv1 = fpext float %3 to double + %add = fadd double %conv1, 1.000000e+00 + %conv2 = fptrunc double %add to float + store float %conv2, float* %2, align 4 + br label %if.end + +if.else: ; preds = %entry + %4 = load float** %output.addr, align 8 + %5 = load float* %4, align 4 + %conv3 = fpext float %5 to double + %add4 = fadd double %conv3, 2.000000e+00 + %conv5 = fptrunc double %add4 to float + store float %conv5, float* %4, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void @llvm.cuda.syncthreads() + %6 = load float** %output.addr, align 8 + %arrayidx6 = getelementptr inbounds float* %6, i64 0 + %7 = load float* %arrayidx6, align 4 + %conv7 = fpext float %7 to double + %cmp8 = fcmp olt double %conv7, 1.000000e+01 + br i1 %cmp8, label %if.then9, label %if.else13 + +if.then9: ; preds = %if.end + %8 = load float** %output.addr, align 8 + %9 = load float* %8, align 4 + %conv10 = fpext float %9 to double + %add11 = fadd double %conv10, 3.000000e+00 + %conv12 = fptrunc double %add11 to float + store float %conv12, float* %8, align 4 + br label %if.end17 + +if.else13: ; preds = %if.end + %10 = load float** %output.addr, align 8 + %11 = load float* %10, align 4 + %conv14 = fpext float %11 to double + %add15 = fadd double %conv14, 4.000000e+00 + %conv16 = fptrunc double %add15 to float + store float %conv16, float* %10, align 4 + br label %if.end17 + +if.end17: ; preds = %if.else13, %if.then9 + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @llvm.cuda.syncthreads() #2 + +!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1} +!1 = metadata !{null, metadata !"align", i32 8} diff --git a/test/Feature/intrinsic-noduplicate.ll b/test/Feature/intrinsic-noduplicate.ll new file mode 100644 index 00000000000..9a2b0aba5bd --- /dev/null +++ b/test/Feature/intrinsic-noduplicate.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; Make sure LLVM knows about the noduplicate attribute on the +; llvm.cuda.syncthreads intrinsic. + +declare void @llvm.cuda.syncthreads() + +; CHECK: declare void @llvm.cuda.syncthreads() #[[ATTRNUM:[0-9]+]] +; CHECK: attributes #[[ATTRNUM]] = { noduplicate nounwind } diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h index edbb18bbcf5..06daa97b66e 100644 --- a/utils/TableGen/CodeGenIntrinsics.h +++ b/utils/TableGen/CodeGenIntrinsics.h @@ -73,6 +73,9 @@ namespace llvm { /// canThrow - True if the intrinsic can throw. bool canThrow; + /// isNoDuplicate - True if the intrinsic is marked as noduplicate. + bool isNoDuplicate; + /// isNoReturn - True if the intrinsic is no-return. bool isNoReturn; diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index 1f47675eb5e..884af4c7cb7 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -446,6 +446,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { isCommutative = false; canThrow = false; isNoReturn = false; + isNoDuplicate = false; if (DefName.size() <= 4 || std::string(DefName.begin(), DefName.begin() + 4) != "int_") @@ -570,6 +571,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { isCommutative = true; else if (Property->getName() == "Throws") canThrow = true; + else if (Property->getName() == "IntrNoDuplicate") + isNoDuplicate = true; else if (Property->getName() == "IntrNoReturn") isNoReturn = true; else if (Property->isSubClassOf("NoCapture")) { diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index cf6934cb169..1b281288a4e 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -502,6 +502,9 @@ struct AttributeComparator { if (L->canThrow != R->canThrow) return R->canThrow; + if (L->isNoDuplicate != R->isNoDuplicate) + return R->isNoDuplicate; + if (L->isNoReturn != R->isNoReturn) return R->isNoReturn; @@ -616,7 +619,8 @@ EmitAttributes(const std::vector &Ints, raw_ostream &OS) { ModRefKind modRef = getModRefKind(intrinsic); - if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn) { + if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn || + intrinsic.isNoDuplicate) { OS << " const Attribute::AttrKind Atts[] = {"; bool addComma = false; if (!intrinsic.canThrow) { @@ -629,6 +633,12 @@ EmitAttributes(const std::vector &Ints, raw_ostream &OS) { OS << "Attribute::NoReturn"; addComma = true; } + if (intrinsic.isNoDuplicate) { + if (addComma) + OS << ","; + OS << "Attribute::NoDuplicate"; + addComma = true; + } switch (modRef) { case MRK_none: break;