mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
Teach the rewriting of memcpy calls to support subvector copies.
This also cleans up a bit of the memcpy call rewriting by sinking some irrelevant code further down and making the call-emitting code a bit more concrete. Previously, memcpy of a subvector would actually miscompile (!!!) the copy into a single vector element copy. I have no idea how this ever worked. =/ This is the memcpy half of PR14478 which we probably weren't noticing previously because it didn't actually assert. The rewrite relies on the newly refactored insert- and extractVector functions to do the heavy lifting, and those are the same as used for loads and stores which makes the test coverage a bit more meaningful here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170338 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3d170e64ca
commit
99a54942ae
@ -2900,37 +2900,22 @@ private:
|
||||
// Record this instruction for deletion.
|
||||
Pass.DeadInsts.insert(&II);
|
||||
|
||||
bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
|
||||
EndOffset == NewAllocaEndOffset;
|
||||
bool IsVectorElement = VecTy && !IsWholeAlloca;
|
||||
uint64_t Size = EndOffset - BeginOffset;
|
||||
IntegerType *SubIntTy
|
||||
= IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
|
||||
|
||||
Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
|
||||
: II.getRawDest()->getType();
|
||||
if (!EmitMemCpy) {
|
||||
if (IsVectorElement)
|
||||
OtherPtrTy = VecTy->getElementType()->getPointerTo();
|
||||
else if (IntTy && !IsWholeAlloca)
|
||||
OtherPtrTy = SubIntTy->getPointerTo();
|
||||
else
|
||||
OtherPtrTy = NewAI.getType();
|
||||
}
|
||||
|
||||
// Compute the other pointer, folding as much as possible to produce
|
||||
// a single, simple GEP in most cases.
|
||||
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
|
||||
OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
|
||||
getName("." + OtherPtr->getName()));
|
||||
|
||||
// Strip all inbounds GEPs and pointer casts to try to dig out any root
|
||||
// alloca that should be re-examined after rewriting this instruction.
|
||||
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
|
||||
if (AllocaInst *AI
|
||||
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
|
||||
Pass.Worklist.insert(AI);
|
||||
|
||||
if (EmitMemCpy) {
|
||||
Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
|
||||
: II.getRawDest()->getType();
|
||||
|
||||
// Compute the other pointer, folding as much as possible to produce
|
||||
// a single, simple GEP in most cases.
|
||||
OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
|
||||
getName("." + OtherPtr->getName()));
|
||||
|
||||
Value *OurPtr
|
||||
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
|
||||
: II.getRawSource()->getType());
|
||||
@ -2951,18 +2936,38 @@ private:
|
||||
if (!Align)
|
||||
Align = 1;
|
||||
|
||||
Value *SrcPtr = OtherPtr;
|
||||
bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
|
||||
EndOffset == NewAllocaEndOffset;
|
||||
uint64_t Size = EndOffset - BeginOffset;
|
||||
unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
|
||||
unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
|
||||
unsigned NumElements = EndIndex - BeginIndex;
|
||||
IntegerType *SubIntTy
|
||||
= IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
|
||||
|
||||
Type *OtherPtrTy = NewAI.getType();
|
||||
if (VecTy && !IsWholeAlloca) {
|
||||
if (NumElements == 1)
|
||||
OtherPtrTy = VecTy->getElementType();
|
||||
else
|
||||
OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
|
||||
|
||||
OtherPtrTy = OtherPtrTy->getPointerTo();
|
||||
} else if (IntTy && !IsWholeAlloca) {
|
||||
OtherPtrTy = SubIntTy->getPointerTo();
|
||||
}
|
||||
|
||||
Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
|
||||
getName("." + OtherPtr->getName()));
|
||||
Value *DstPtr = &NewAI;
|
||||
if (!IsDest)
|
||||
std::swap(SrcPtr, DstPtr);
|
||||
|
||||
Value *Src;
|
||||
if (IsVectorElement && !IsDest) {
|
||||
// We have to extract rather than load.
|
||||
Src = IRB.CreateExtractElement(
|
||||
IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
|
||||
IRB.getInt32(getIndex(BeginOffset)),
|
||||
getName(".copyextract"));
|
||||
if (VecTy && !IsWholeAlloca && !IsDest) {
|
||||
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||
getName(".load"));
|
||||
Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
|
||||
} else if (IntTy && !IsWholeAlloca && !IsDest) {
|
||||
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||
getName(".load"));
|
||||
@ -2975,7 +2980,11 @@ private:
|
||||
getName(".copyload"));
|
||||
}
|
||||
|
||||
if (IntTy && !IsWholeAlloca && IsDest) {
|
||||
if (VecTy && !IsWholeAlloca && IsDest) {
|
||||
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||
getName(".oldload"));
|
||||
Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
|
||||
} else if (IntTy && !IsWholeAlloca && IsDest) {
|
||||
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||
getName(".oldload"));
|
||||
Old = convertValue(TD, IRB, Old, IntTy);
|
||||
@ -2985,14 +2994,6 @@ private:
|
||||
Src = convertValue(TD, IRB, Src, NewAllocaTy);
|
||||
}
|
||||
|
||||
if (IsVectorElement && IsDest) {
|
||||
// We have to insert into a loaded copy before storing.
|
||||
Src = IRB.CreateInsertElement(
|
||||
IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
|
||||
Src, IRB.getInt32(getIndex(BeginOffset)),
|
||||
getName(".insert"));
|
||||
}
|
||||
|
||||
StoreInst *Store = cast<StoreInst>(
|
||||
IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
|
||||
(void)Store;
|
||||
|
@ -314,6 +314,54 @@ entry:
|
||||
; CHECK-NEXT: ret <4 x float> %[[insert4]]
|
||||
}
|
||||
|
||||
define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
|
||||
; CHECK: @test_subvec_memcpy
|
||||
entry:
|
||||
%a = alloca <4 x float>
|
||||
; CHECK-NOT: alloca
|
||||
|
||||
%a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
|
||||
%a.cast0 = bitcast float* %a.gep0 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
|
||||
; CHECK: %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
|
||||
; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
|
||||
; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: %[[insert_x:.*]] = shufflevector <4 x float> %[[expand_x]], <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
|
||||
|
||||
%a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
|
||||
%a.cast1 = bitcast float* %a.gep1 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
|
||||
; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
|
||||
; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
|
||||
; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
|
||||
; CHECK-NEXT: %[[insert_y:.*]] = shufflevector <4 x float> %[[expand_y]], <4 x float> %[[insert_x]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
|
||||
|
||||
%a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
|
||||
%a.cast2 = bitcast float* %a.gep2 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
|
||||
; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
|
||||
; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
|
||||
; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
|
||||
; CHECK-NEXT: %[[insert_z:.*]] = shufflevector <4 x float> %[[expand_z]], <4 x float> %[[insert_y]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
|
||||
%a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
|
||||
%a.cast3 = bitcast float* %a.gep3 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
|
||||
; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
|
||||
; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
|
||||
; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> %[[insert_z]], float %[[f]], i32 3
|
||||
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
|
||||
; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
|
||||
; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
|
||||
; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
|
||||
|
||||
%ret = load <4 x float>* %a
|
||||
|
||||
ret <4 x float> %ret
|
||||
; CHECK-NEXT: ret <4 x float> %[[insert_f]]
|
||||
}
|
||||
|
||||
define i32 @PR14212() {
|
||||
; CHECK: @PR14212
|
||||
; This caused a crash when "splitting" the load of the i32 in order to promote
|
||||
|
Loading…
x
Reference in New Issue
Block a user