From f4bdec2ebeb1306a77e9377583c5799199775f88 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:25 +0000 Subject: [PATCH] R600: Fix a rare bug where swizzle optimization returns wrong values git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185942 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 5 ++-- test/CodeGen/R600/swizzle-export.ll | 34 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 4413734b401..ad4fd87b79a 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1296,6 +1296,8 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, VectorEntry.getOperand(3) }; bool isUnmovable[4] = { false, false, false, false }; + for (unsigned i = 0; i < 4; i++) + RemapSwizzle[i] = i; for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { @@ -1304,8 +1306,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, if (!isUnmovable[Idx]) { // Swap i and Idx std::swap(NewBldVec[Idx], NewBldVec[i]); - RemapSwizzle[Idx] = i; - RemapSwizzle[i] = Idx; + std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]); } isUnmovable[Idx] = true; } diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll index c3fb1151254..b2175afdf0a 100644 --- a/test/CodeGen/R600/swizzle-export.ll +++ b/test/CodeGen/R600/swizzle-export.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +;EG-CHECK: @main ;EG-CHECK: EXPORT T{{[0-9]+}}.XYXX ;EG-CHECK: EXPORT T{{[0-9]+}}.ZXXX ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX @@ -91,10 +92,43 @@ main_body: ret void } +; EG-CHECK: @main2 +; EG-CHECK: T{{[0-9]+}}.ZXY0 + +define void @main2() #0 { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %5 = extractelement <4 x float> %4, i32 0 + %6 = call float @llvm.cos.f32(float %5) + %7 = load <4 x float> addrspace(8)* null + %8 = extractelement <4 x float> %7, i32 0 + %9 = load <4 x float> addrspace(8)* null + %10 = extractelement <4 x float> %9, i32 1 + %11 = insertelement <4 x float> undef, float %0, i32 0 + %12 = insertelement <4 x float> %11, float %1, i32 1 + %13 = insertelement <4 x float> %12, float %2, i32 2 + %14 = insertelement <4 x float> %13, float %3, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1) + %15 = insertelement <4 x float> undef, float %6, i32 0 + %16 = insertelement <4 x float> %15, float %8, i32 1 + %17 = insertelement <4 x float> %16, float %10, i32 2 + %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2) + ret void +} + ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 +; Function Attrs: nounwind readonly +declare float @llvm.cos.f32(float) #2 + declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } +attributes #2 = { nounwind readonly }