llvm-6502/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
Bill Schmidt 63292d1bba [PPC64] Handle vpkudum mask pattern correctly when vpkudum isn't available
My recent patch to add support for ISA 2.07 vector pack/unpack
instructions didn't properly check for availability of the vpkudum
instruction when recognizing it as a special vector shuffle case.
This causes us to leave the vector shuffle in place (rather than
converting it to a vector permute) so that it can be recognized later
as a vpkudum, but that pattern is invalid for processors prior to
POWER8.  Thus LLVM crashes with an "unable to select" message.  We
observed this since one of our buildbots is configured to generate
code for a POWER7.

This patch fixes the problem by checking for availability of the
vpkudum instruction during custom lowering of vector shuffles.

I've added a test case variant for the vpkudum pattern when the
instruction isn't available.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237952 91177308-0d34-0410-b5e6-96231b3b80d8
2015-05-21 20:48:49 +00:00

55 lines
2.0 KiB
LLVM

; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck -check-prefix=CHECK-PWR7 %s
define void @VPKUDUM_unary(<2 x i64>* %A) {
entry:
%tmp = load <2 x i64>, <2 x i64>* %A
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
%tmp3 = extractelement <4 x i32> %tmp2, i32 1
%tmp4 = extractelement <4 x i32> %tmp2, i32 3
%tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0
%tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3
%tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64>
store <2 x i64> %tmp9, <2 x i64>* %A
ret void
}
; CHECK-LABEL: @VPKUDUM_unary
; CHECK-NOT: vperm
; CHECK-NOT: vmrglw
; CHECK-NOT: vmrghw
; CHECK: vpkudum
; CHECK-PWR7: vmrglw
; CHECK-PWR7: vmrghw
; CHECK-PWR7: vmrglw
define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
entry:
%tmp = load <2 x i64>, <2 x i64>* %A
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
%tmp3 = load <2 x i64>, <2 x i64>* %B
%tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32>
%tmp5 = extractelement <4 x i32> %tmp2, i32 1
%tmp6 = extractelement <4 x i32> %tmp2, i32 3
%tmp7 = extractelement <4 x i32> %tmp4, i32 1
%tmp8 = extractelement <4 x i32> %tmp4, i32 3
%tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
%tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1
%tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2
%tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3
%tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64>
store <2 x i64> %tmp13, <2 x i64>* %A
ret void
}
; CHECK-LABEL: @VPKUDUM
; CHECK-NOT: vperm
; CHECK-NOT: vmrglw
; CHECK-NOT: vmrghw
; CHECK: vpkudum
; CHECK-PWR7: vmrglw
; CHECK-PWR7: vmrghw
; CHECK-PWR7: vmrglw