Bill Wendling 69a05a7b92 Generate a VTBL instruction instead of a series of loads and stores when we
can. As Nate pointed out, VTBL isn't super performant, but it *has* to be better
than this:

_shuf:
@ BB#0:       @ %entry
  push        {r4, r7, lr}
  add         r7, sp, #4
  sub         sp, #12
  mov         r4, sp
  bic         r4, r4, #7
  mov         sp, r4
  mov         r2, sp
  vmov        d16, r0, r1
  orr         r0, r2, #6
  orr         r3, r2, #7
  vst1.8      {d16[0]}, [r3]
  vst1.8      {d16[5]}, [r0]
  subs        r4, r7, #4
  orr         r0, r2, #5
  vst1.8      {d16[4]}, [r0]
  orr         r0, r2, #4
  vst1.8      {d16[4]}, [r0]
  orr         r0, r2, #3
  vst1.8      {d16[0]}, [r0]
  orr         r0, r2, #2
  vst1.8      {d16[2]}, [r0]
  orr         r0, r2, #1
  vst1.8      {d16[1]}, [r0]
  vst1.8      {d16[3]}, [r2]
  vldr.64     d16, [sp]
  vmov        r0, r1, d16
  mov         sp, r4
  pop         {r4, r7, pc}

The "illegal" testcase in vext.ll is no longer illegal.
<rdar://problem/9078775>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127630 91177308-0d34-0410-b5e6-96231b3b80d8
2011-03-14 23:02:38 +00:00
..
2011-02-24 19:09:52 +00:00
2011-03-09 01:05:00 +00:00
2011-03-11 21:52:04 +00:00