X86: Make shuffle -> shift conversion more aggressive about undefs.

Shuffles that only move an element into position 0 of the vector are common in the output of the loop vectorizer and often generate suboptimal code when SSSE3 is not available. Lower them to vector shifts if possible. We still prefer palignr over psrldq because it has higher throughput on sandybridge. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182102 91177308-0d34-0410-b5e6-96231b3b80d8
2025-11-03 14:21:30 +00:00 · 2013-05-17 14:48:34 +00:00
parent c032d1aca0
commit a0de26ce34
3 changed files with 54 additions and 20 deletions
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -46,7 +46,7 @@ entry:
 }

 ; CHECK: palignr $8
-; CHECK: psrldq $8
+; CHECK: palignr $8
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
  %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s
 ; There are no MMX operations in @t1

 define void  @t1(i32 %a, x86_mmx* %P) nounwind {
@@ -41,3 +41,27 @@ define <4 x float> @t4(<4 x float>* %P) nounwind {
 ; CHECK: t4:
 ; CHECK: psrldq $12
 }
+
+define <16 x i8> @t5(<16 x i8> %x) nounwind {
+        %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
+        ret <16 x i8> %s
+
+; CHECK: t5:
+; CHECK: psrldq $1
+}
+
+define <16 x i8> @t6(<16 x i8> %x) nounwind {
+        %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+        ret <16 x i8> %s
+
+; CHECK: t6:
+; CHECK: palignr $1
+}
+
+define <16 x i8> @t7(<16 x i8> %x) nounwind {
+        %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
+        ret <16 x i8> %s
+
+; CHECK: t7:
+; CHECK: pslldq $13
+}