Allow v16i16 and v32i8 shuffles to be rewritten as narrower shuffles.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156156 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2012-05-04 04:44:49 +00:00
parent 98bda3dfef
commit f3640d7ec1
2 changed files with 16 additions and 5 deletions

View File

@ -5920,10 +5920,12 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
unsigned Scale;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
}
SmallVector<int, 8> MaskVec;
@ -6370,7 +6372,8 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
VT == MVT::v16i16 || VT == MVT::v32i8) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);

View File

@ -202,3 +202,11 @@ define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
%t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i64> %t
}
; CHECK: narrow
; CHECK: vpermilps
; CHECK: ret
define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
%t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
ret <16 x i16> %t
}