mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
63292d1bba
My recent patch to add support for ISA 2.07 vector pack/unpack instructions didn't properly check for availability of the vpkudum instruction when recognizing it as a special vector shuffle case. This causes us to leave the vector shuffle in place (rather than converting it to a vector permute) so that it can be recognized later as a vpkudum, but that pattern is invalid for processors prior to POWER8. Thus LLVM crashes with an "unable to select" message. We observed this since one of our buildbots is configured to generate code for a POWER7. This patch fixes the problem by checking for availability of the vpkudum instruction during custom lowering of vector shuffles. I've added a test case variant for the vpkudum pattern when the instruction isn't available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237952 91177308-0d34-0410-b5e6-96231b3b80d8
55 lines
2.0 KiB
LLVM
55 lines
2.0 KiB
LLVM
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
|
|
; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck -check-prefix=CHECK-PWR7 %s
|
|
|
|
define void @VPKUDUM_unary(<2 x i64>* %A) {
|
|
entry:
|
|
%tmp = load <2 x i64>, <2 x i64>* %A
|
|
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
|
|
%tmp3 = extractelement <4 x i32> %tmp2, i32 1
|
|
%tmp4 = extractelement <4 x i32> %tmp2, i32 3
|
|
%tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0
|
|
%tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1
|
|
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2
|
|
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3
|
|
%tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64>
|
|
store <2 x i64> %tmp9, <2 x i64>* %A
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: @VPKUDUM_unary
|
|
; CHECK-NOT: vperm
|
|
; CHECK-NOT: vmrglw
|
|
; CHECK-NOT: vmrghw
|
|
; CHECK: vpkudum
|
|
; CHECK-PWR7: vmrglw
|
|
; CHECK-PWR7: vmrghw
|
|
; CHECK-PWR7: vmrglw
|
|
|
|
define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
|
|
entry:
|
|
%tmp = load <2 x i64>, <2 x i64>* %A
|
|
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
|
|
%tmp3 = load <2 x i64>, <2 x i64>* %B
|
|
%tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32>
|
|
%tmp5 = extractelement <4 x i32> %tmp2, i32 1
|
|
%tmp6 = extractelement <4 x i32> %tmp2, i32 3
|
|
%tmp7 = extractelement <4 x i32> %tmp4, i32 1
|
|
%tmp8 = extractelement <4 x i32> %tmp4, i32 3
|
|
%tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
|
|
%tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1
|
|
%tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2
|
|
%tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3
|
|
%tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64>
|
|
store <2 x i64> %tmp13, <2 x i64>* %A
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: @VPKUDUM
|
|
; CHECK-NOT: vperm
|
|
; CHECK-NOT: vmrglw
|
|
; CHECK-NOT: vmrghw
|
|
; CHECK: vpkudum
|
|
; CHECK-PWR7: vmrglw
|
|
; CHECK-PWR7: vmrghw
|
|
; CHECK-PWR7: vmrglw
|