mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
R600: Fix a rare bug where swizzle optimization returns wrong values
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185942 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
07bb3f1d0a
commit
f4bdec2ebe
@ -1296,6 +1296,8 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
|
||||
VectorEntry.getOperand(3)
|
||||
};
|
||||
bool isUnmovable[4] = { false, false, false, false };
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
RemapSwizzle[i] = i;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||
@ -1304,8 +1306,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
|
||||
if (!isUnmovable[Idx]) {
|
||||
// Swap i and Idx
|
||||
std::swap(NewBldVec[Idx], NewBldVec[i]);
|
||||
RemapSwizzle[Idx] = i;
|
||||
RemapSwizzle[i] = Idx;
|
||||
std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
|
||||
}
|
||||
isUnmovable[Idx] = true;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
|
||||
|
||||
;EG-CHECK: @main
|
||||
;EG-CHECK: EXPORT T{{[0-9]+}}.XYXX
|
||||
;EG-CHECK: EXPORT T{{[0-9]+}}.ZXXX
|
||||
;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
|
||||
@ -91,10 +92,43 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK: @main2
|
||||
; EG-CHECK: T{{[0-9]+}}.ZXY0
|
||||
|
||||
define void @main2() #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
%6 = call float @llvm.cos.f32(float %5)
|
||||
%7 = load <4 x float> addrspace(8)* null
|
||||
%8 = extractelement <4 x float> %7, i32 0
|
||||
%9 = load <4 x float> addrspace(8)* null
|
||||
%10 = extractelement <4 x float> %9, i32 1
|
||||
%11 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%12 = insertelement <4 x float> %11, float %1, i32 1
|
||||
%13 = insertelement <4 x float> %12, float %2, i32 2
|
||||
%14 = insertelement <4 x float> %13, float %3, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1)
|
||||
%15 = insertelement <4 x float> undef, float %6, i32 0
|
||||
%16 = insertelement <4 x float> %15, float %8, i32 1
|
||||
%17 = insertelement <4 x float> %16, float %10, i32 2
|
||||
%18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare float @llvm.cos.f32(float) #2
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #2 = { nounwind readonly }
|
||||
|
Loading…
Reference in New Issue
Block a user