From 5cddda6d13ab66c462ccbd61255ad6e6f95e9f6f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 15 Nov 2013 18:26:45 +0000 Subject: [PATCH] R600/SI: Add VReg_96 register class to SIRegisterInfo::hasVGPRs() This fixes a crash with GNOME settings manager. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194836 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIRegisterInfo.cpp | 1 + test/CodeGen/R600/sgpr-copy.ll | 46 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 0bbad09cf15..97c216d3d5b 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -90,6 +90,7 @@ bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const { bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_512RegClass, RC); diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index c7741fc874c..67cedd371db 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -222,3 +222,49 @@ declare float @llvm.pow.f32(float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 + +; This checks for a bug in the FixSGPRCopies pass where VReg96 +; registers were being identified as an SGPR regclass which was causing +; an assertion failure. + +; CHECK-LABEL: @sample_v3 +; CHECK: IMAGE_SAMPLE +; CHECK: IMAGE_SAMPLE +; CHECK: EXP +; CHECK: S_ENDPGM +define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { + +entry: + %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 + %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2 + %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) + %24 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 + %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2 + %26 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 + %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2 + %28 = fcmp oeq float %23, 0.0 + br i1 %28, label %if, label %else + +if: + %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> , <32 x i8> %25, <16 x i8> %27, i32 2) + %val.if.0 = extractelement <4 x float> %val.if, i32 0 + %val.if.1 = extractelement <4 x float> %val.if, i32 1 + %val.if.2 = extractelement <4 x float> %val.if, i32 2 + br label %endif + +else: + %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> , <32 x i8> %25, <16 x i8> %27, i32 2) + %val.else.0 = extractelement <4 x float> %val.else, i32 0 + %val.else.1 = extractelement <4 x float> %val.else, i32 1 + %val.else.2 = extractelement <4 x float> %val.else, i32 2 + br label %endif + +endif: + %val.0 = phi float [%val.if.0, %if], [%val.else.0, %else] + %val.1 = phi float [%val.if.1, %if], [%val.else.1, %else] + %val.2 = phi float [%val.if.2, %if], [%val.else.2, %else] + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.0) + ret void +} + +!2 = metadata !{metadata !"const", null, i32 1}