AVX-512: Optimized BUILD_VECTOR pattern;

fixed encoding of VEXTRACTPS instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201134 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 01:31:05 +00:00 · 2014-02-11 07:25:59 +00:00 · 2014-02-11 07:25:59 +00:00 · e4092e9895
commit e4092e9895
parent defd4bc77b
4 changed files with 23 additions and 5 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -6070,8 +6070,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {

  // For AVX-length vectors, build the individual 128-bit pieces and use
  // shuffles to put them in place.
-  if (VT.is256BitVector()) {
-    SmallVector<SDValue, 32> V;
+  if (VT.is256BitVector() || VT.is512BitVector()) {
+    SmallVector<SDValue, 64> V;
    for (unsigned i = 0; i != NumElems; ++i)
      V.push_back(Op.getOperand(i));

@ -6083,7 +6083,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
                                NumElems/2);

    // Recreate the wider vector with the lower and upper part.
-    return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+    if (VT.is256BitVector())
+      return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+    return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
  }

  // Let legalizer expand 2-wide build_vectors.
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@ -361,7 +361,7 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
      (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
-                          addr:$dst)]>, EVEX;
+                          addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;

 //===---------------------------------------------------------------------===//
 // AVX-512 BROADCAST
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@ -15,4 +15,16 @@ define <16 x i32> @test1(i32* %x) {
 define <16 x i32> @test2(<16 x i32> %x) {
   %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
   ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test3
+; CHECK: vinsertf128
+; CHECK: vinsertf64x4
+; CHECK: ret
+define <16 x float> @test3(<4 x float> %a) {
+  %b = extractelement <4 x float> %a, i32 2
+  %c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
+  %b1 = extractelement <4 x float> %a, i32 0
+  %c1 = insertelement <16 x float> %c, float %b1, i32 6
+  ret <16 x float>%c1
 }
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@ -46,4 +46,8 @@ vmovdqu64 %zmm0, %zmm1 {%k3}

 // CHECK: vmovd
 // CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0xb4,0x24,0xac,0xff,0xff,0xff]
-vmovd %xmm22, -84(%rsp)
+vmovd %xmm22, -84(%rsp)
+
+// CHECK: vextractps
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0x61,0x1f,0x02]
+vextractps      $2, %xmm20, 124(%rcx)