2006-04-06 21:19:37 +00:00
; RUN: llvm-as < %s | opt -instcombine | llc -march=ppc32 -mcpu=g5 | not grep vperm &&
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsldoi | wc -l | grep 2 &&
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vmrgh | wc -l | grep 3 &&
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vmrgl | wc -l | grep 3
2006-04-06 18:26:13 +00:00
void %VSLDOI_xy ( < 8 x s h o r t > * %A , < 8 x s h o r t > * %B ) {
entry:
%tmp = load < 8 x s h o r t > * %A ; <<8 x short>> [#uses=1]
%tmp2 = load < 8 x s h o r t > * %B ; <<8 x short>> [#uses=1]
%tmp = c a s t < 8 x s h o r t > %tmp to < 16 x s b y t e > ; <<16 x sbyte>> [#uses=11]
%tmp2 = c a s t < 8 x s h o r t > %tmp2 to < 16 x s b y t e > ; <<16 x sbyte>> [#uses=5]
%tmp = extractelement < 16 x s b y t e > %tmp , u i n t 5 ; <sbyte> [#uses=1]
%tmp3 = extractelement < 16 x s b y t e > %tmp , u i n t 6 ; <sbyte> [#uses=1]
%tmp4 = extractelement < 16 x s b y t e > %tmp , u i n t 7 ; <sbyte> [#uses=1]
%tmp5 = extractelement < 16 x s b y t e > %tmp , u i n t 8 ; <sbyte> [#uses=1]
%tmp6 = extractelement < 16 x s b y t e > %tmp , u i n t 9 ; <sbyte> [#uses=1]
%tmp7 = extractelement < 16 x s b y t e > %tmp , u i n t 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement < 16 x s b y t e > %tmp , u i n t 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement < 16 x s b y t e > %tmp , u i n t 12 ; <sbyte> [#uses=1]
%tmp10 = extractelement < 16 x s b y t e > %tmp , u i n t 13 ; <sbyte> [#uses=1]
%tmp11 = extractelement < 16 x s b y t e > %tmp , u i n t 14 ; <sbyte> [#uses=1]
%tmp12 = extractelement < 16 x s b y t e > %tmp , u i n t 15 ; <sbyte> [#uses=1]
%tmp13 = extractelement < 16 x s b y t e > %tmp2 , u i n t 0 ; <sbyte> [#uses=1]
%tmp14 = extractelement < 16 x s b y t e > %tmp2 , u i n t 1 ; <sbyte> [#uses=1]
%tmp15 = extractelement < 16 x s b y t e > %tmp2 , u i n t 2 ; <sbyte> [#uses=1]
%tmp16 = extractelement < 16 x s b y t e > %tmp2 , u i n t 3 ; <sbyte> [#uses=1]
%tmp17 = extractelement < 16 x s b y t e > %tmp2 , u i n t 4 ; <sbyte> [#uses=1]
%tmp18 = insertelement < 16 x s b y t e > undef , s b y t e %tmp , u i n t 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement < 16 x s b y t e > %tmp18 , s b y t e %tmp3 , u i n t 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement < 16 x s b y t e > %tmp19 , s b y t e %tmp4 , u i n t 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement < 16 x s b y t e > %tmp20 , s b y t e %tmp5 , u i n t 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement < 16 x s b y t e > %tmp21 , s b y t e %tmp6 , u i n t 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement < 16 x s b y t e > %tmp22 , s b y t e %tmp7 , u i n t 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement < 16 x s b y t e > %tmp23 , s b y t e %tmp8 , u i n t 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement < 16 x s b y t e > %tmp24 , s b y t e %tmp9 , u i n t 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement < 16 x s b y t e > %tmp25 , s b y t e %tmp10 , u i n t 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement < 16 x s b y t e > %tmp26 , s b y t e %tmp11 , u i n t 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement < 16 x s b y t e > %tmp27 , s b y t e %tmp12 , u i n t 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement < 16 x s b y t e > %tmp28 , s b y t e %tmp13 , u i n t 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement < 16 x s b y t e > %tmp29 , s b y t e %tmp14 , u i n t 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement < 16 x s b y t e > %tmp30 , s b y t e %tmp15 , u i n t 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement < 16 x s b y t e > %tmp31 , s b y t e %tmp16 , u i n t 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement < 16 x s b y t e > %tmp32 , s b y t e %tmp17 , u i n t 15 ; <<16 x sbyte>> [#uses=1]
%tmp33 = c a s t < 16 x s b y t e > %tmp33 to < 8 x s h o r t > ; <<8 x short>> [#uses=1]
store < 8 x s h o r t > %tmp33 , < 8 x s h o r t > * %A
ret void
}
void %VSLDOI_xx ( < 8 x s h o r t > * %A , < 8 x s h o r t > * %B ) {
%tmp = load < 8 x s h o r t > * %A ; <<8 x short>> [#uses=1]
%tmp2 = load < 8 x s h o r t > * %A ; <<8 x short>> [#uses=1]
%tmp = c a s t < 8 x s h o r t > %tmp to < 16 x s b y t e > ; <<16 x sbyte>> [#uses=11]
%tmp2 = c a s t < 8 x s h o r t > %tmp2 to < 16 x s b y t e > ; <<16 x sbyte>> [#uses=5]
%tmp = extractelement < 16 x s b y t e > %tmp , u i n t 5 ; <sbyte> [#uses=1]
%tmp3 = extractelement < 16 x s b y t e > %tmp , u i n t 6 ; <sbyte> [#uses=1]
%tmp4 = extractelement < 16 x s b y t e > %tmp , u i n t 7 ; <sbyte> [#uses=1]
%tmp5 = extractelement < 16 x s b y t e > %tmp , u i n t 8 ; <sbyte> [#uses=1]
%tmp6 = extractelement < 16 x s b y t e > %tmp , u i n t 9 ; <sbyte> [#uses=1]
%tmp7 = extractelement < 16 x s b y t e > %tmp , u i n t 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement < 16 x s b y t e > %tmp , u i n t 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement < 16 x s b y t e > %tmp , u i n t 12 ; <sbyte> [#uses=1]
%tmp10 = extractelement < 16 x s b y t e > %tmp , u i n t 13 ; <sbyte> [#uses=1]
%tmp11 = extractelement < 16 x s b y t e > %tmp , u i n t 14 ; <sbyte> [#uses=1]
%tmp12 = extractelement < 16 x s b y t e > %tmp , u i n t 15 ; <sbyte> [#uses=1]
%tmp13 = extractelement < 16 x s b y t e > %tmp2 , u i n t 0 ; <sbyte> [#uses=1]
%tmp14 = extractelement < 16 x s b y t e > %tmp2 , u i n t 1 ; <sbyte> [#uses=1]
%tmp15 = extractelement < 16 x s b y t e > %tmp2 , u i n t 2 ; <sbyte> [#uses=1]
%tmp16 = extractelement < 16 x s b y t e > %tmp2 , u i n t 3 ; <sbyte> [#uses=1]
%tmp17 = extractelement < 16 x s b y t e > %tmp2 , u i n t 4 ; <sbyte> [#uses=1]
%tmp18 = insertelement < 16 x s b y t e > undef , s b y t e %tmp , u i n t 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement < 16 x s b y t e > %tmp18 , s b y t e %tmp3 , u i n t 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement < 16 x s b y t e > %tmp19 , s b y t e %tmp4 , u i n t 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement < 16 x s b y t e > %tmp20 , s b y t e %tmp5 , u i n t 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement < 16 x s b y t e > %tmp21 , s b y t e %tmp6 , u i n t 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement < 16 x s b y t e > %tmp22 , s b y t e %tmp7 , u i n t 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement < 16 x s b y t e > %tmp23 , s b y t e %tmp8 , u i n t 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement < 16 x s b y t e > %tmp24 , s b y t e %tmp9 , u i n t 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement < 16 x s b y t e > %tmp25 , s b y t e %tmp10 , u i n t 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement < 16 x s b y t e > %tmp26 , s b y t e %tmp11 , u i n t 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement < 16 x s b y t e > %tmp27 , s b y t e %tmp12 , u i n t 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement < 16 x s b y t e > %tmp28 , s b y t e %tmp13 , u i n t 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement < 16 x s b y t e > %tmp29 , s b y t e %tmp14 , u i n t 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement < 16 x s b y t e > %tmp30 , s b y t e %tmp15 , u i n t 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement < 16 x s b y t e > %tmp31 , s b y t e %tmp16 , u i n t 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement < 16 x s b y t e > %tmp32 , s b y t e %tmp17 , u i n t 15 ; <<16 x sbyte>> [#uses=1]
%tmp33 = c a s t < 16 x s b y t e > %tmp33 to < 8 x s h o r t > ; <<8 x short>> [#uses=1]
store < 8 x s h o r t > %tmp33 , < 8 x s h o r t > * %A
ret void
}
2006-04-06 19:21:02 +00:00
void %VPERM_promote ( < 8 x s h o r t > * %A , < 8 x s h o r t > * %B ) {
entry:
%tmp = load < 8 x s h o r t > * %A ; <<8 x short>> [#uses=1]
%tmp = c a s t < 8 x s h o r t > %tmp to < 4 x i n t > ; <<4 x int>> [#uses=1]
%tmp2 = load < 8 x s h o r t > * %B ; <<8 x short>> [#uses=1]
%tmp2 = c a s t < 8 x s h o r t > %tmp2 to < 4 x i n t > ; <<4 x int>> [#uses=1]
%tmp3 = call < 4 x i n t > %llvm.ppc.altivec.vperm ( < 4 x i n t > %tmp , < 4 x i n t > %tmp2 , < 16 x s b y t e > < s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 , s b y t e 14 > ) ; <<4 x int>> [#uses=1]
%tmp3 = c a s t < 4 x i n t > %tmp3 to < 8 x s h o r t > ; <<8 x short>> [#uses=1]
store < 8 x s h o r t > %tmp3 , < 8 x s h o r t > * %A
ret void
}
declare < 4 x i n t > %llvm.ppc.altivec.vperm ( < 4 x i n t > , < 4 x i n t > , < 16 x s b y t e > )
2006-04-06 21:19:37 +00:00
void %tb_l ( < 16 x s b y t e > * %A , < 16 x s b y t e > * %B ) {
entry:
%tmp = load < 16 x s b y t e > * %A ; <<16 x sbyte>> [#uses=8]
%tmp2 = load < 16 x s b y t e > * %B ; <<16 x sbyte>> [#uses=8]
%tmp = extractelement < 16 x s b y t e > %tmp , u i n t 8 ; <sbyte> [#uses=1]
%tmp3 = extractelement < 16 x s b y t e > %tmp2 , u i n t 8 ; <sbyte> [#uses=1]
%tmp4 = extractelement < 16 x s b y t e > %tmp , u i n t 9 ; <sbyte> [#uses=1]
%tmp5 = extractelement < 16 x s b y t e > %tmp2 , u i n t 9 ; <sbyte> [#uses=1]
%tmp6 = extractelement < 16 x s b y t e > %tmp , u i n t 10 ; <sbyte> [#uses=1]
%tmp7 = extractelement < 16 x s b y t e > %tmp2 , u i n t 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement < 16 x s b y t e > %tmp , u i n t 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement < 16 x s b y t e > %tmp2 , u i n t 11 ; <sbyte> [#uses=1]
%tmp10 = extractelement < 16 x s b y t e > %tmp , u i n t 12 ; <sbyte> [#uses=1]
%tmp11 = extractelement < 16 x s b y t e > %tmp2 , u i n t 12 ; <sbyte> [#uses=1]
%tmp12 = extractelement < 16 x s b y t e > %tmp , u i n t 13 ; <sbyte> [#uses=1]
%tmp13 = extractelement < 16 x s b y t e > %tmp2 , u i n t 13 ; <sbyte> [#uses=1]
%tmp14 = extractelement < 16 x s b y t e > %tmp , u i n t 14 ; <sbyte> [#uses=1]
%tmp15 = extractelement < 16 x s b y t e > %tmp2 , u i n t 14 ; <sbyte> [#uses=1]
%tmp16 = extractelement < 16 x s b y t e > %tmp , u i n t 15 ; <sbyte> [#uses=1]
%tmp17 = extractelement < 16 x s b y t e > %tmp2 , u i n t 15 ; <sbyte> [#uses=1]
%tmp18 = insertelement < 16 x s b y t e > undef , s b y t e %tmp , u i n t 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement < 16 x s b y t e > %tmp18 , s b y t e %tmp3 , u i n t 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement < 16 x s b y t e > %tmp19 , s b y t e %tmp4 , u i n t 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement < 16 x s b y t e > %tmp20 , s b y t e %tmp5 , u i n t 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement < 16 x s b y t e > %tmp21 , s b y t e %tmp6 , u i n t 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement < 16 x s b y t e > %tmp22 , s b y t e %tmp7 , u i n t 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement < 16 x s b y t e > %tmp23 , s b y t e %tmp8 , u i n t 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement < 16 x s b y t e > %tmp24 , s b y t e %tmp9 , u i n t 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement < 16 x s b y t e > %tmp25 , s b y t e %tmp10 , u i n t 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement < 16 x s b y t e > %tmp26 , s b y t e %tmp11 , u i n t 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement < 16 x s b y t e > %tmp27 , s b y t e %tmp12 , u i n t 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement < 16 x s b y t e > %tmp28 , s b y t e %tmp13 , u i n t 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement < 16 x s b y t e > %tmp29 , s b y t e %tmp14 , u i n t 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement < 16 x s b y t e > %tmp30 , s b y t e %tmp15 , u i n t 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement < 16 x s b y t e > %tmp31 , s b y t e %tmp16 , u i n t 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement < 16 x s b y t e > %tmp32 , s b y t e %tmp17 , u i n t 15 ; <<16 x sbyte>> [#uses=1]
store < 16 x s b y t e > %tmp33 , < 16 x s b y t e > * %A
ret void
}
void %th_l ( < 8 x s h o r t > * %A , < 8 x s h o r t > * %B ) {
entry:
%tmp = load < 8 x s h o r t > * %A ; <<8 x short>> [#uses=4]
%tmp2 = load < 8 x s h o r t > * %B ; <<8 x short>> [#uses=4]
%tmp = extractelement < 8 x s h o r t > %tmp , u i n t 4 ; <short> [#uses=1]
%tmp3 = extractelement < 8 x s h o r t > %tmp2 , u i n t 4 ; <short> [#uses=1]
%tmp4 = extractelement < 8 x s h o r t > %tmp , u i n t 5 ; <short> [#uses=1]
%tmp5 = extractelement < 8 x s h o r t > %tmp2 , u i n t 5 ; <short> [#uses=1]
%tmp6 = extractelement < 8 x s h o r t > %tmp , u i n t 6 ; <short> [#uses=1]
%tmp7 = extractelement < 8 x s h o r t > %tmp2 , u i n t 6 ; <short> [#uses=1]
%tmp8 = extractelement < 8 x s h o r t > %tmp , u i n t 7 ; <short> [#uses=1]
%tmp9 = extractelement < 8 x s h o r t > %tmp2 , u i n t 7 ; <short> [#uses=1]
%tmp10 = insertelement < 8 x s h o r t > undef , s h o r t %tmp , u i n t 0 ; <<8 x short>> [#uses=1]
%tmp11 = insertelement < 8 x s h o r t > %tmp10 , s h o r t %tmp3 , u i n t 1 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement < 8 x s h o r t > %tmp11 , s h o r t %tmp4 , u i n t 2 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement < 8 x s h o r t > %tmp12 , s h o r t %tmp5 , u i n t 3 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement < 8 x s h o r t > %tmp13 , s h o r t %tmp6 , u i n t 4 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement < 8 x s h o r t > %tmp14 , s h o r t %tmp7 , u i n t 5 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement < 8 x s h o r t > %tmp15 , s h o r t %tmp8 , u i n t 6 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement < 8 x s h o r t > %tmp16 , s h o r t %tmp9 , u i n t 7 ; <<8 x short>> [#uses=1]
store < 8 x s h o r t > %tmp17 , < 8 x s h o r t > * %A
ret void
}
void %tw_l ( < 4 x i n t > * %A , < 4 x i n t > * %B ) {
entry:
%tmp = load < 4 x i n t > * %A ; <<4 x int>> [#uses=2]
%tmp2 = load < 4 x i n t > * %B ; <<4 x int>> [#uses=2]
%tmp = extractelement < 4 x i n t > %tmp , u i n t 2 ; <int> [#uses=1]
%tmp3 = extractelement < 4 x i n t > %tmp2 , u i n t 2 ; <int> [#uses=1]
%tmp4 = extractelement < 4 x i n t > %tmp , u i n t 3 ; <int> [#uses=1]
%tmp5 = extractelement < 4 x i n t > %tmp2 , u i n t 3 ; <int> [#uses=1]
%tmp6 = insertelement < 4 x i n t > undef , i n t %tmp , u i n t 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement < 4 x i n t > %tmp6 , i n t %tmp3 , u i n t 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement < 4 x i n t > %tmp7 , i n t %tmp4 , u i n t 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement < 4 x i n t > %tmp8 , i n t %tmp5 , u i n t 3 ; <<4 x int>> [#uses=1]
store < 4 x i n t > %tmp9 , < 4 x i n t > * %A
ret void
}
void %tb_h ( < 16 x s b y t e > * %A , < 16 x s b y t e > * %B ) {
entry:
%tmp = load < 16 x s b y t e > * %A ; <<16 x sbyte>> [#uses=8]
%tmp2 = load < 16 x s b y t e > * %B ; <<16 x sbyte>> [#uses=8]
%tmp = extractelement < 16 x s b y t e > %tmp , u i n t 0 ; <sbyte> [#uses=1]
%tmp3 = extractelement < 16 x s b y t e > %tmp2 , u i n t 0 ; <sbyte> [#uses=1]
%tmp4 = extractelement < 16 x s b y t e > %tmp , u i n t 1 ; <sbyte> [#uses=1]
%tmp5 = extractelement < 16 x s b y t e > %tmp2 , u i n t 1 ; <sbyte> [#uses=1]
%tmp6 = extractelement < 16 x s b y t e > %tmp , u i n t 2 ; <sbyte> [#uses=1]
%tmp7 = extractelement < 16 x s b y t e > %tmp2 , u i n t 2 ; <sbyte> [#uses=1]
%tmp8 = extractelement < 16 x s b y t e > %tmp , u i n t 3 ; <sbyte> [#uses=1]
%tmp9 = extractelement < 16 x s b y t e > %tmp2 , u i n t 3 ; <sbyte> [#uses=1]
%tmp10 = extractelement < 16 x s b y t e > %tmp , u i n t 4 ; <sbyte> [#uses=1]
%tmp11 = extractelement < 16 x s b y t e > %tmp2 , u i n t 4 ; <sbyte> [#uses=1]
%tmp12 = extractelement < 16 x s b y t e > %tmp , u i n t 5 ; <sbyte> [#uses=1]
%tmp13 = extractelement < 16 x s b y t e > %tmp2 , u i n t 5 ; <sbyte> [#uses=1]
%tmp14 = extractelement < 16 x s b y t e > %tmp , u i n t 6 ; <sbyte> [#uses=1]
%tmp15 = extractelement < 16 x s b y t e > %tmp2 , u i n t 6 ; <sbyte> [#uses=1]
%tmp16 = extractelement < 16 x s b y t e > %tmp , u i n t 7 ; <sbyte> [#uses=1]
%tmp17 = extractelement < 16 x s b y t e > %tmp2 , u i n t 7 ; <sbyte> [#uses=1]
%tmp18 = insertelement < 16 x s b y t e > undef , s b y t e %tmp , u i n t 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement < 16 x s b y t e > %tmp18 , s b y t e %tmp3 , u i n t 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement < 16 x s b y t e > %tmp19 , s b y t e %tmp4 , u i n t 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement < 16 x s b y t e > %tmp20 , s b y t e %tmp5 , u i n t 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement < 16 x s b y t e > %tmp21 , s b y t e %tmp6 , u i n t 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement < 16 x s b y t e > %tmp22 , s b y t e %tmp7 , u i n t 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement < 16 x s b y t e > %tmp23 , s b y t e %tmp8 , u i n t 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement < 16 x s b y t e > %tmp24 , s b y t e %tmp9 , u i n t 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement < 16 x s b y t e > %tmp25 , s b y t e %tmp10 , u i n t 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement < 16 x s b y t e > %tmp26 , s b y t e %tmp11 , u i n t 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement < 16 x s b y t e > %tmp27 , s b y t e %tmp12 , u i n t 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement < 16 x s b y t e > %tmp28 , s b y t e %tmp13 , u i n t 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement < 16 x s b y t e > %tmp29 , s b y t e %tmp14 , u i n t 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement < 16 x s b y t e > %tmp30 , s b y t e %tmp15 , u i n t 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement < 16 x s b y t e > %tmp31 , s b y t e %tmp16 , u i n t 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement < 16 x s b y t e > %tmp32 , s b y t e %tmp17 , u i n t 15 ; <<16 x sbyte>> [#uses=1]
store < 16 x s b y t e > %tmp33 , < 16 x s b y t e > * %A
ret void
}
void %th_h ( < 8 x s h o r t > * %A , < 8 x s h o r t > * %B ) {
entry:
%tmp = load < 8 x s h o r t > * %A ; <<8 x short>> [#uses=4]
%tmp2 = load < 8 x s h o r t > * %B ; <<8 x short>> [#uses=4]
%tmp = extractelement < 8 x s h o r t > %tmp , u i n t 0 ; <short> [#uses=1]
%tmp3 = extractelement < 8 x s h o r t > %tmp2 , u i n t 0 ; <short> [#uses=1]
%tmp4 = extractelement < 8 x s h o r t > %tmp , u i n t 1 ; <short> [#uses=1]
%tmp5 = extractelement < 8 x s h o r t > %tmp2 , u i n t 1 ; <short> [#uses=1]
%tmp6 = extractelement < 8 x s h o r t > %tmp , u i n t 2 ; <short> [#uses=1]
%tmp7 = extractelement < 8 x s h o r t > %tmp2 , u i n t 2 ; <short> [#uses=1]
%tmp8 = extractelement < 8 x s h o r t > %tmp , u i n t 3 ; <short> [#uses=1]
%tmp9 = extractelement < 8 x s h o r t > %tmp2 , u i n t 3 ; <short> [#uses=1]
%tmp10 = insertelement < 8 x s h o r t > undef , s h o r t %tmp , u i n t 0 ; <<8 x short>> [#uses=1]
%tmp11 = insertelement < 8 x s h o r t > %tmp10 , s h o r t %tmp3 , u i n t 1 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement < 8 x s h o r t > %tmp11 , s h o r t %tmp4 , u i n t 2 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement < 8 x s h o r t > %tmp12 , s h o r t %tmp5 , u i n t 3 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement < 8 x s h o r t > %tmp13 , s h o r t %tmp6 , u i n t 4 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement < 8 x s h o r t > %tmp14 , s h o r t %tmp7 , u i n t 5 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement < 8 x s h o r t > %tmp15 , s h o r t %tmp8 , u i n t 6 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement < 8 x s h o r t > %tmp16 , s h o r t %tmp9 , u i n t 7 ; <<8 x short>> [#uses=1]
store < 8 x s h o r t > %tmp17 , < 8 x s h o r t > * %A
ret void
}
void %tw_h ( < 4 x i n t > * %A , < 4 x i n t > * %B ) {
entry:
%tmp = load < 4 x i n t > * %A ; <<4 x int>> [#uses=2]
%tmp2 = load < 4 x i n t > * %B ; <<4 x int>> [#uses=2]
%tmp = extractelement < 4 x i n t > %tmp , u i n t 0 ; <int> [#uses=1]
%tmp3 = extractelement < 4 x i n t > %tmp2 , u i n t 0 ; <int> [#uses=1]
%tmp4 = extractelement < 4 x i n t > %tmp , u i n t 1 ; <int> [#uses=1]
%tmp5 = extractelement < 4 x i n t > %tmp2 , u i n t 1 ; <int> [#uses=1]
%tmp6 = insertelement < 4 x i n t > undef , i n t %tmp , u i n t 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement < 4 x i n t > %tmp6 , i n t %tmp3 , u i n t 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement < 4 x i n t > %tmp7 , i n t %tmp4 , u i n t 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement < 4 x i n t > %tmp8 , i n t %tmp5 , u i n t 3 ; <<4 x int>> [#uses=1]
store < 4 x i n t > %tmp9 , < 4 x i n t > * %A
ret void
}