Now that SROA can form alloca's for dynamic vector accesses, further improve it to be able to replace operations on these vector alloca's with insert/extract element insts

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158623 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Pete Cooper 2012-06-17 03:58:26 +00:00
parent e04690e092
commit 80f020a34a
2 changed files with 161 additions and 61 deletions

View File

@ -264,23 +264,31 @@ class ConvertToScalarInfo {
/// large integers unless there is some potential for optimization.
bool HadNonMemTransferAccess;
/// HadDynamicAccess - True if some element of this alloca was dynamic.
/// We don't yet have support for turning a dynamic access into a large
/// integer.
bool HadDynamicAccess;
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
: AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
VectorTy(0), HadNonMemTransferAccess(false) { }
VectorTy(0), HadNonMemTransferAccess(false), HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
Value *NonConstantIdx);
Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, IRBuilder<> &Builder);
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder);
};
} // end anonymous namespace.
@ -291,7 +299,7 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
return 0;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
@ -319,13 +327,18 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
return 0;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
// know whether to use a left or right shift.
if (ScalarKind == Integer && HadDynamicAccess)
return 0;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
ConvertUsesToScalar(AI, NewAI, 0, 0);
return NewAI;
}
@ -412,7 +425,8 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
Value* NonConstantIdx) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
@ -442,24 +456,35 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
return false;
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// If this is a GEP with a variable indices, we can't handle it.
if (!GEP->hasAllConstantIndices())
PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
if (!PtrTy)
return false;
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
if (!GEP->getPointerOperandType()->isPointerTy())
return false;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Value *GEPNonConstantIdx = 0;
if (!GEP->hasAllConstantIndices()) {
if (!isa<VectorType>(PtrTy->getElementType()))
return false;
if (NonConstantIdx)
return false;
GEPNonConstantIdx = Indices.pop_back_val();
if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
return false;
HadDynamicAccess = true;
} else
GEPNonConstantIdx = NonConstantIdx;
uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
HadNonMemTransferAccess = true;
@ -469,6 +494,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
// Store of constant value.
if (!isa<ConstantInt>(MSI->getValue()))
return false;
@ -493,6 +521,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
@ -524,12 +555,13 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right. By the end of this, there should be no uses of Ptr.
void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
uint64_t Offset) {
uint64_t Offset,
Value* NonConstantIdx) {
while (!Ptr->use_empty()) {
Instruction *User = cast<Instruction>(Ptr->use_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
ConvertUsesToScalar(CI, NewAI, Offset);
ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
CI->eraseFromParent();
continue;
}
@ -537,9 +569,11 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
if (!GEP->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
GEP->eraseFromParent();
continue;
}
@ -550,7 +584,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
NonConstantIdx, Builder);
LI->replaceAllUsesWith(NewLoadVal);
LI->eraseFromParent();
continue;
@ -560,7 +595,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
Builder);
NonConstantIdx, Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
@ -575,6 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
unsigned NumBytes = static_cast<unsigned>(SNumBytes);
@ -591,7 +627,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Old, Offset, 0, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@ -607,6 +643,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
@ -679,7 +716,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
/// shifted to the right.
Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
@ -701,7 +739,17 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
Value *Idx;
if (NonConstantIdx) {
if (Elt)
Idx = Builder.CreateAdd(NonConstantIdx,
Builder.getInt32(Elt),
"dyn.offset");
else
Idx = NonConstantIdx;
} else
Idx = Builder.getInt32(Elt);
Value *V = Builder.CreateExtractElement(FromVal, Idx);
if (V->getType() != ToType)
V = Builder.CreateBitCast(V, ToType);
return V;
@ -710,23 +758,27 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (StructType *ST = dyn_cast<StructType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
Builder);
0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, Builder);
Offset+i*EltSize, 0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@ -792,9 +844,14 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
///
/// NonConstantIdx is an index value if there was a GEP with a non-constant
/// index value. If this is 0 then all GEPs used to find this insert address
/// are constant.
Value *ConvertToScalarInfo::
ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t Offset, IRBuilder<> &Builder) {
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
Type *AllocaType = Old->getType();
@ -815,26 +872,40 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
SV = Builder.CreateBitCast(SV, EltTy);
uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
Value *Idx;
if (NonConstantIdx) {
if (Elt)
Idx = Builder.CreateAdd(NonConstantIdx,
Builder.getInt32(Elt),
"dyn.offset");
else
Idx = NonConstantIdx;
} else
Idx = Builder.getInt32(Elt);
return Builder.CreateInsertElement(Old, SV, Idx);
}
// If SV is a first-class aggregate value, insert each value recursively.
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
0, Builder);
}
return Old;
}
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
}
return Old;
}

View File

@ -4,12 +4,14 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
target triple = "x86_64-apple-darwin10.0.0"
; CHECK: @test1
; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
; CHECK: memset
; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
; Split the array but don't replace the memset with an insert
; element as its not a constant offset.
; The load, however, can be replaced with an extract element.
define float @test1(i32 %idx1, i32 %idx2) {
entry:
%0 = alloca [4 x <4 x float>]
@ -23,13 +25,8 @@ entry:
}
; CHECK: @test2
; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1
; CHECK: store float 1.000000e+00, float* %ptr1
; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2
; CHECK: %ret = load float* %ptr2
; CHECK: ret float %ret
; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
; Do SROA on the array when it has dynamic vector reads and writes.
define float @test2(i32 %idx1, i32 %idx2) {
@ -61,13 +58,34 @@ entry:
ret float %ret
}
; CHECK: @test4
; CHECK: test4
; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <16 x float> %0, i32 %idx2
; Don't do SROA on a dynamically indexed vector when it spans
; more than one array element of the alloca array it is within.
; However, unlike test3, the store is on the vector type
; so SROA will convert the large alloca into the large vector
; type and do all accesses with insert/extract element
define float @test4(i32 %idx1, i32 %idx2) {
entry:
%0 = alloca [4 x <4 x float>]
%bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
store <16 x float> zeroinitializer, <16 x float>* %bigvec
%ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
store float 1.0, float* %ptr1
%ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
%ret = load float* %ptr2
ret float %ret
}
; CHECK: @test5
; CHECK: %0 = alloca [4 x <4 x float>]
; CHECK-NOT: alloca
; Don't do SROA as the is a second dynamically indexed array
; which may span multiple elements of the alloca.
define float @test4(i32 %idx1, i32 %idx2) {
define float @test5(i32 %idx1, i32 %idx2) {
entry:
%0 = alloca [4 x <4 x float>]
store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
@ -80,15 +98,9 @@ entry:
ret float %ret
}
; CHECK: test5
; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]]
; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1
; CHECK: store float 1.000000e+00, float* %ptr1
; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2
; CHECK: %ret = load float* %ptr2
; CHECK: test6
; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
%vector.pair = type { %vector.anon, %vector.anon }
%vector.anon = type { %vector }
@ -99,7 +111,7 @@ entry:
; the original GEP, just the indices it needs to get to the correct offset of
; some type, not necessarily the dynamic vector.
; This test makes sure we don't have this crash.
define float @test5(i32 %idx1, i32 %idx2) {
define float @test6(i32 %idx1, i32 %idx2) {
entry:
%0 = alloca %vector.pair
store %vector.pair zeroinitializer, %vector.pair* %0
@ -110,21 +122,15 @@ entry:
ret float %ret
}
; CHECK: test6
; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]]
; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1
; CHECK: store float 1.000000e+00, float* %ptr1
; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2
; CHECK: %ret = load float* %ptr2
; CHECK: test7
; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
%array.pair = type { [2 x %array.anon], %array.anon }
%array.anon = type { [2 x %vector] }
; This is the same as test5 and tests the same crash, but on arrays.
define float @test6(i32 %idx1, i32 %idx2) {
; This is the same as test6 and tests the same crash, but on arrays.
define float @test7(i32 %idx1, i32 %idx2) {
entry:
%0 = alloca %array.pair
store %array.pair zeroinitializer, %array.pair* %0
@ -135,4 +141,27 @@ entry:
ret float %ret
}
; CHECK: test8
; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
; Do SROA on the vector when it has dynamic vector reads and writes
; from a non-zero offset.
define float @test8(i32 %idx1, i32 %idx2) {
entry:
%0 = alloca <4 x float>
store <4 x float> zeroinitializer, <4 x float>* %0
%ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
%ptr2 = bitcast float* %ptr1 to <3 x float>*
%ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
store float 1.0, float* %ptr3
%ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
%ptr5 = bitcast float* %ptr4 to <2 x float>*
%ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
%ret = load float* %ptr6
ret float %ret
}
declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)