When scalar replacement returns a vector type, only accept it if the vector

type's bitwidth matches the (allocated) size of the alloca. This severely pessimizes vector scalar replacement when the only vector type being used is something like <3 x float> on x86 or ARM whose allocated size matches a <4 x float>. I hope to fix some of the flawed assumptions about allocated size throughout scalar replacement and reenable this in most cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133338 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-21 23:17:16 +00:00 · 2011-06-18 06:17:51 +00:00
parent b85e4eba85
commit 3ebb05d9a6
3 changed files with 24 additions and 2 deletions
@@ -293,6 +293,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
  if (ScalarKind == Unknown)
    ScalarKind = Integer;

+  // FIXME: It should be possible to promote the vector type up to the alloca's
+  // size.
+  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+    ScalarKind = Integer;
+
  // If we were able to find a vector type that can handle this with
  // insert/extract elements, and if there was at least one use that had
  // a vector type, promote this to a vector.  We don't want to promote
@@ -10,7 +10,8 @@ target triple = "x86_64-apple-macosx10.7.0"

 ; CHECK: main
 ; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer
+; CHECK: %[[A:[a-z0-9]*]] = and i128
+; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32

 define void @main() uwtable ssp {
 entry:
@@ -27,7 +28,8 @@ entry:

 ; CHECK: test1
 ; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer
+; CHECK: %[[A:[a-z0-9]*]] = and i128
+; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32

 define void @test1() uwtable ssp {
 entry:
@@ -19,4 +19,19 @@ entry:
  ret float %val
 }

+; CHECK: g
+; CHECK-NOT: alloca
+; CHECK: and i128
+
+define void @g() nounwind ssp {
+entry:
+  %a = alloca { <4 x float> }, align 16
+  %p = bitcast { <4 x float> }* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
+  %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
+  %arrayidx = getelementptr inbounds [2 x <2 x float>]* %q, i32 0, i32 0
+  store <2 x float> undef, <2 x float>* %arrayidx, align 8
+  ret void
+}
+
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind