Fix PPC ABI for ByVal structs with vector members

When a structure is passed by value, and that structure contains a vector
member, according to the PPC ABI, the structure will receive enhanced alignment
(so that the vector within the structure will always be aligned).

This should resolve PR16641.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190636 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2013-09-12 23:20:06 +00:00
parent 405b6503b3
commit 6671cd4db0
2 changed files with 113 additions and 9 deletions

View File

@ -578,24 +578,48 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} }
} }
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
unsigned MaxMaxAlign) {
if (MaxAlign == MaxMaxAlign)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
MaxAlign = 32;
else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == MaxMaxAlign)
break;
}
}
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. /// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
const TargetMachine &TM = getTargetMachine(); const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary. // Darwin passes everything on 4 byte boundary.
if (TM.getSubtarget<PPCSubtarget>().isDarwin()) if (PPCSubTarget.isDarwin())
return 4; return 4;
// 16byte and wider vectors are passed on 16byte boundary. // 16byte and wider vectors are passed on 16byte boundary.
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
if (VTy->getBitWidth() >= 128)
return 16;
// The rest is 8 on PPC64 and 4 on PPC32 boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary.
if (PPCSubTarget.isPPC64()) unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
return 8; if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
return 4; return Align;
} }
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@ -2281,6 +2305,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(FIN); InVals.push_back(FIN);
continue; continue;
} }
unsigned BVAlign = Flags.getByValAlign();
if (BVAlign > 8) {
ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
CurArgOffset = ArgOffset;
}
// All aggregates smaller than 8 bytes must be passed right-justified. // All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize) if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@ -3870,6 +3901,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0) if (Size == 0)
continue; continue;
unsigned BVAlign = Flags.getByValAlign();
if (BVAlign > 8) {
if (BVAlign % PtrByteSize != 0)
llvm_unreachable(
"ByVal alignment is not a multiple of the pointer size");
ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
}
// All aggregates smaller than 8 bytes must be passed right-justified. // All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) { if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);

View File

@ -0,0 +1,64 @@
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
%struct.s2 = type { i64, <4 x float> }
@ve = external global <4 x float>
@n = external global i64
; Function Attrs: nounwind
define void @test1(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, <4 x float> inreg %vs.coerce) #0 {
entry:
store <4 x float> %vs.coerce, <4 x float>* @ve, align 16, !tbaa !0
ret void
; CHECK-LABEL: @test1
; CHECK: stvx 2,
; CHECK: blr
}
; Function Attrs: nounwind
define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 {
entry:
%m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
%0 = load i64* %m, align 8, !tbaa !2
store i64 %0, i64* @n, align 8, !tbaa !2
%v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
%1 = load <4 x float>* %v, align 16, !tbaa !0
store <4 x float> %1, <4 x float>* @ve, align 16, !tbaa !0
ret void
; CHECK-LABEL: @test2
; CHECK: ld {{[0-9]+}}, 112(1)
; CHECK: li [[REG16:[0-9]+]], 16
; CHECK: addi [[REGB:[0-9]+]], 1, 112
; CHECK: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
}
; Function Attrs: nounwind
define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 {
entry:
%m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
%0 = load i64* %m, align 8, !tbaa !2
store i64 %0, i64* @n, align 8, !tbaa !2
%v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
%1 = load <4 x float>* %v, align 16, !tbaa !0
store <4 x float> %1, <4 x float>* @ve, align 16, !tbaa !0
ret void
; CHECK-LABEL: @test3
; CHECK: ld {{[0-9]+}}, 128(1)
; CHECK: li [[REG16:[0-9]+]], 16
; CHECK: addi [[REGB:[0-9]+]], 1, 128
; CHECK: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
}
attributes #0 = { nounwind }
!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA"}
!2 = metadata !{metadata !"long", metadata !0}