Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148337 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 01:31:05 +00:00 · 2012-01-17 21:44:01 +00:00 · 2012-01-17 21:44:01 +00:00 · ba05c91ed2
commit ba05c91ed2
parent 6220fea2a8
4 changed files with 66 additions and 9 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -6982,13 +6982,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  // (vextract (scalar_to_vector val, 0) -> val
  SDValue InVec = N->getOperand(0);
+  EVT VT = InVec.getValueType();
+  EVT NVT = N->getValueType(0);

  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
    // Check if the result type doesn't match the inserted element type. A
    // SCALAR_TO_VECTOR may truncate the inserted element and the
    // EXTRACT_VECTOR_ELT may widen the extracted vector.
    SDValue InOp = InVec.getOperand(0);
-    EVT NVT = N->getValueType(0);
    if (InOp.getValueType() != NVT) {
      assert(InOp.getValueType().isInteger() && NVT.isInteger());
      return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@ -6996,6 +6997,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
    return InOp;
  }

+  SDValue EltNo = N->getOperand(1);
+  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+  // We only perform this optimization before the op legalization phase because
+  // we may introduce new vector instructions which are not backed by TD patterns.
+  // For example on AVX, extracting elements from a wide vector without using
+  // extract_subvector.
+  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+      && ConstEltNo && !LegalOperations) {
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    int NumElem = VT.getVectorNumElements();
+    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+    // Find the new index to extract from.
+    int OrigElt = SVOp->getMaskElt(Elt);
+
+    // Extracting an undef index is undef.
+    if (OrigElt == -1)
+      return DAG.getUNDEF(NVT);
+
+    // Select the right vector half to extract from.
+    if (OrigElt < NumElem) {
+      InVec = InVec->getOperand(0);
+    } else {
+      InVec = InVec->getOperand(1);
+      OrigElt -= NumElem;
+    }
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+                       InVec, DAG.getConstant(OrigElt, MVT::i32));
+  }
+
  // Perform only after legalization to ensure build_vector / vector_shuffle
  // optimizations have already been done.
  if (!LegalOperations) return SDValue();
@ -7003,13 +7036,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
-  SDValue EltNo = N->getOperand(1);

-  if (isa<ConstantSDNode>(EltNo)) {
+  if (ConstEltNo) {
    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
    bool NewLoad = false;
    bool BCNumEltsChanged = false;
-    EVT VT = InVec.getValueType();
    EVT ExtVT = VT.getVectorElementType();
    EVT LVT = ExtVT;

--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 86
+; RUN: grep rot          %t1.s | count 85
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
 ; RUN: grep roti.*27     %t1.s | count 1
@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) {
 define <2 x float> @test1(<4 x float> %param )
 {
 ; CHECK: test1
-; CHECK: rotqbyi
+; CHECK: shufb
  %el = extractelement <4 x float> %param, i32 1
  %vec1 = insertelement <1 x float> undef, float %el, i32 0
  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@ -12,11 +12,11 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
 ; rdar://10538417
 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
 ; CHECK: test2:
-; CHECK: vxorpd
-; CHECK: vperm2f128
+; CHECK: vinsertf128
  %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
  ret <3 x i64> %2
+; CHECK: ret
 }

 define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
@ -24,6 +24,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
  ret <4 x i64> %c
 ; CHECK: test3:
 ; CHECK: vperm2f128
+; CHECK: ret
 }

 define <8 x float> @test4(float %a) nounwind {
@ -75,3 +76,23 @@ entry:
 ; CHECK: ret
  ret void
 }
+
+; Extract a value from a shufflevector..
+define i32 @test9(<4 x i32> %a) nounwind {
+; CHECK: test9
+; CHECK: vpextrd
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %r = extractelement <8 x i32> %b, i32 2
+; CHECK: ret
+  ret i32 %r
+}
+
+; Extract a value which is the result of an undef mask.
+define i32 @test10(<4 x i32> %a) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: #
+; CHECK-NEXT: ret
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = extractelement <8 x i32> %b, i32 2
+  ret i32 %r
+}
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@ -10,6 +10,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }


@ -23,6 +24,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }

 ; Example of when widening a v3float operation causes the DAG to replace a node
@ -31,7 +33,7 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: shuf3:
-; CHECK: pshufd
+; CHECK: shufps
  %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
  %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@ -45,6 +47,7 @@ entry:
  %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
  store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
  ret void
+; CHECK: ret
 }

 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
@ -53,6 +56,7 @@ define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK-NOT: punpckldq
  %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i8> %vshuf
+; CHECK: ret
 }

 ; PR11389: another CONCAT_VECTORS case
@ -61,4 +65,5 @@ define void @shuf5(<8 x i8>* %p) nounwind {
  %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  store <8 x i8> %v, <8 x i8>* %p, align 8
  ret void
+; CHECK: ret
 }