mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Fix PPC ABI for ByVal structs with vector members
When a structure is passed by value, and that structure contains a vector member, according to the PPC ABI, the structure will receive enhanced alignment (so that the vector within the structure will always be aligned). This should resolve PR16641. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190636 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
405b6503b3
commit
6671cd4db0
@ -578,24 +578,48 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
||||
}
|
||||
}
|
||||
|
||||
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
|
||||
/// the desired ByVal argument alignment.
|
||||
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
|
||||
unsigned MaxMaxAlign) {
|
||||
if (MaxAlign == MaxMaxAlign)
|
||||
return;
|
||||
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
|
||||
if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
|
||||
MaxAlign = 32;
|
||||
else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
|
||||
MaxAlign = 16;
|
||||
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
|
||||
unsigned EltAlign = 0;
|
||||
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
|
||||
if (EltAlign > MaxAlign)
|
||||
MaxAlign = EltAlign;
|
||||
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
unsigned EltAlign = 0;
|
||||
getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
|
||||
if (EltAlign > MaxAlign)
|
||||
MaxAlign = EltAlign;
|
||||
if (MaxAlign == MaxMaxAlign)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
|
||||
/// function arguments in the caller parameter area.
|
||||
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
|
||||
const TargetMachine &TM = getTargetMachine();
|
||||
// Darwin passes everything on 4 byte boundary.
|
||||
if (TM.getSubtarget<PPCSubtarget>().isDarwin())
|
||||
if (PPCSubTarget.isDarwin())
|
||||
return 4;
|
||||
|
||||
// 16byte and wider vectors are passed on 16byte boundary.
|
||||
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
|
||||
if (VTy->getBitWidth() >= 128)
|
||||
return 16;
|
||||
|
||||
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
|
||||
if (PPCSubTarget.isPPC64())
|
||||
return 8;
|
||||
|
||||
return 4;
|
||||
unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
|
||||
if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
|
||||
getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
|
||||
return Align;
|
||||
}
|
||||
|
||||
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
@ -2281,6 +2305,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
InVals.push_back(FIN);
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned BVAlign = Flags.getByValAlign();
|
||||
if (BVAlign > 8) {
|
||||
ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
|
||||
CurArgOffset = ArgOffset;
|
||||
}
|
||||
|
||||
// All aggregates smaller than 8 bytes must be passed right-justified.
|
||||
if (ObjSize < PtrByteSize)
|
||||
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
|
||||
@ -3870,6 +3901,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
if (Size == 0)
|
||||
continue;
|
||||
|
||||
unsigned BVAlign = Flags.getByValAlign();
|
||||
if (BVAlign > 8) {
|
||||
if (BVAlign % PtrByteSize != 0)
|
||||
llvm_unreachable(
|
||||
"ByVal alignment is not a multiple of the pointer size");
|
||||
|
||||
ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
|
||||
}
|
||||
|
||||
// All aggregates smaller than 8 bytes must be passed right-justified.
|
||||
if (Size==1 || Size==2 || Size==4) {
|
||||
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
|
||||
|
64
test/CodeGen/PowerPC/vec-abi-align.ll
Normal file
64
test/CodeGen/PowerPC/vec-abi-align.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
%struct.s2 = type { i64, <4 x float> }
|
||||
|
||||
@ve = external global <4 x float>
|
||||
@n = external global i64
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test1(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, <4 x float> inreg %vs.coerce) #0 {
|
||||
entry:
|
||||
store <4 x float> %vs.coerce, <4 x float>* @ve, align 16, !tbaa !0
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: stvx 2,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 {
|
||||
entry:
|
||||
%m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
|
||||
%0 = load i64* %m, align 8, !tbaa !2
|
||||
store i64 %0, i64* @n, align 8, !tbaa !2
|
||||
%v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
|
||||
%1 = load <4 x float>* %v, align 16, !tbaa !0
|
||||
store <4 x float> %1, <4 x float>* @ve, align 16, !tbaa !0
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: ld {{[0-9]+}}, 112(1)
|
||||
; CHECK: li [[REG16:[0-9]+]], 16
|
||||
; CHECK: addi [[REGB:[0-9]+]], 1, 112
|
||||
; CHECK: lvx 2, [[REGB]], [[REG16]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 {
|
||||
entry:
|
||||
%m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
|
||||
%0 = load i64* %m, align 8, !tbaa !2
|
||||
store i64 %0, i64* @n, align 8, !tbaa !2
|
||||
%v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
|
||||
%1 = load <4 x float>* %v, align 16, !tbaa !0
|
||||
store <4 x float> %1, <4 x float>* @ve, align 16, !tbaa !0
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: ld {{[0-9]+}}, 128(1)
|
||||
; CHECK: li [[REG16:[0-9]+]], 16
|
||||
; CHECK: addi [[REGB:[0-9]+]], 1, 128
|
||||
; CHECK: lvx 2, [[REGB]], [[REG16]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
!0 = metadata !{metadata !"omnipotent char", metadata !1}
|
||||
!1 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||
!2 = metadata !{metadata !"long", metadata !0}
|
||||
|
Loading…
Reference in New Issue
Block a user