optimize "integer extraction out of the middle of a vector" as produced

by SRoA. This is part of rdar://7892780, but needs another xform to expose this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112232 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-03 00:33:09 +00:00 · 2010-08-26 22:14:59 +00:00 · 2010-08-26 22:14:59 +00:00 · 26dbe7ec18
commit 26dbe7ec18
parent 1ab3f16f06
2 changed files with 60 additions and 13 deletions
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@ -1337,31 +1337,53 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,

 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
 /// bitcast.  The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) {
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType();

  // If this is a bitcast from int to float, check to see if the int is an
  // extraction from a vector.
  Value *VecInput = 0;
+  // bitcast(trunc(bitcast(somevector)))
  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
      isa<VectorType>(VecInput->getType())) {
    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    const Type *DestTy = CI.getType();
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
    
-    // If the element type of the vector doesn't match the result type, but the
-    // vector type's size is a multiple of the result type, bitcast it to be a
-    // vector type we can extract from.
-    if (VecTy->getElementType() != DestTy &&
-        VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) {
-      VecTy = VectorType::get(DestTy,
-            VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits());
-      VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-    }
-    
-    if (VecTy->getElementType() == DestTy)
      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+    }
  }
  
+  // bitcast(trunc(lshr(bitcast(somevector), cst))
+  ConstantInt *ShAmt = 0;
+  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+                                m_ConstantInt(ShAmt)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+        ShAmt->getZExtValue() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+      
+      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+    }
+  }
  return 0;
 }

--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@ -35,3 +35,28 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 ; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
 ; CHECK-NEXT:  ret float %add
 }
+
+; Optimize bitcasts that are extracting other elements of a vector.  This
+; happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+  
+; CHECK: @test3
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 2
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}