AVX-512: Optimized BUILD_VECTOR pattern;

fixed encoding of VEXTRACTPS instruction.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201134 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2014-02-11 07:25:59 +00:00
parent defd4bc77b
commit e4092e9895
4 changed files with 23 additions and 5 deletions

View File

@ -6070,8 +6070,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
if (VT.is256BitVector()) {
SmallVector<SDValue, 32> V;
if (VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
@ -6083,7 +6083,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
NumElems/2);
// Recreate the wider vector with the lower and upper part.
return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
if (VT.is256BitVector())
return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
}
// Let legalizer expand 2-wide build_vectors.

View File

@ -361,7 +361,7 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
(ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX;
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST

View File

@ -15,4 +15,16 @@ define <16 x i32> @test1(i32* %x) {
define <16 x i32> @test2(<16 x i32> %x) {
%res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
ret <16 x i32>%res
}
; CHECK-LABEL: test3
; CHECK: vinsertf128
; CHECK: vinsertf64x4
; CHECK: ret
define <16 x float> @test3(<4 x float> %a) {
%b = extractelement <4 x float> %a, i32 2
%c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
%b1 = extractelement <4 x float> %a, i32 0
%c1 = insertelement <16 x float> %c, float %b1, i32 6
ret <16 x float>%c1
}

View File

@ -46,4 +46,8 @@ vmovdqu64 %zmm0, %zmm1 {%k3}
// CHECK: vmovd
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0xb4,0x24,0xac,0xff,0xff,0xff]
vmovd %xmm22, -84(%rsp)
vmovd %xmm22, -84(%rsp)
// CHECK: vextractps
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0x61,0x1f,0x02]
vextractps $2, %xmm20, 124(%rcx)