Elena Demikhovsky 3251020738 This patch optimizes shuffle instruction - generates 2 instructions instead of 4.
Since this specific shuffle is widely used in many workloads we have ~10% performance on them.

shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>

vmovaps (%rdx), %ymm0
vshufps $8, %ymm0, %ymm0, %ymm0
vmovaps (%rcx), %ymm1
vshufps $8, %ymm0, %ymm1, %ymm1
vunpcklps       %ymm0, %ymm1, %ymm0

vmovaps (%rcx), %ymm0
vmovsldup       (%rdx), %ymm1
vblendps        $85, %ymm0, %ymm1, %ymm0


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163134 91177308-0d34-0410-b5e6-96231b3b80d8
2012-09-04 12:49:02 +00:00
..
2012-07-16 19:35:43 +00:00
2012-05-19 23:34:59 +00:00
2012-01-27 02:31:29 +00:00
2012-02-02 19:00:49 +00:00
2012-03-20 17:20:46 +00:00
2012-02-02 19:00:49 +00:00
2012-07-23 08:51:15 +00:00
2012-01-05 00:43:34 +00:00
2012-06-19 02:17:35 +00:00
2012-08-17 12:28:26 +00:00
2012-05-24 22:08:29 +00:00
2012-08-17 12:28:26 +00:00
2012-08-17 12:28:26 +00:00
2012-08-31 20:12:31 +00:00
2011-12-08 19:21:39 +00:00
2012-04-20 23:36:09 +00:00
2012-07-17 19:40:05 +00:00
2012-01-19 01:13:47 +00:00
2012-06-01 05:00:54 +00:00
2012-03-30 00:26:54 +00:00
2012-09-01 00:17:06 +00:00