[PowerPC] Fix on-stack AltiVec arguments with 64-bit SVR4

Current 64-bit SVR4 code seems to have some remnants of Darwin code
in AltiVec argument handing.  This had the effect that AltiVec arguments
(or subsequent arguments) were not correctly placed in the parameter area
in some cases.

The correct behaviour with the 64-bit SVR4 ABI is:
- All AltiVec arguments take up space in the parameter area, just like
  any other arguments, whether vararg or not.
- They are always 16-byte aligned, skipping a parameter area doubleword
  (and the associated GPR, if any), if necessary.

This patch implements the correct behaviour and adds a test case.
(Verified against GCC behaviour via the ABI compat test suite.)



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211492 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ulrich Weigand 2014-06-23 12:36:34 +00:00
parent 6f7e87c751
commit fdb6eb65c7
2 changed files with 54 additions and 44 deletions

View File

@ -2475,7 +2475,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// although the first ones are often in registers. // although the first ones are often in registers.
SmallVector<SDValue, 8> MemOps; SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0; unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
@ -2490,22 +2489,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned CurArgOffset = ArgOffset; unsigned CurArgOffset = ArgOffset;
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. // Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 || ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) { ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64)
if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
Flags,
PtrByteSize);
} else
nAltivecParamsAtEnd++;
} else
// Calculate min reserved area. // Calculate min reserved area.
MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize);
Flags,
PtrByteSize);
// FIXME the codegen can be much improved in some cases. // FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory. // We do not have to keep everything in memory.
@ -2654,30 +2645,24 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8: case MVT::v16i8:
case MVT::v2f64: case MVT::v2f64:
case MVT::v2i64: case MVT::v2i64:
// Note that vector arguments in registers don't reserve stack space, // Vectors are aligned to a 16-byte boundary in the argument save area.
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) { while ((ArgOffset % 16) != 0) {
ArgOffset += PtrByteSize; ArgOffset += PtrByteSize;
if (GPR_idx != Num_GPR_Regs) if (GPR_idx != Num_GPR_Regs)
GPR_idx++; GPR_idx++;
} }
ArgOffset += 16; if (VR_idx != Num_VR_Regs) {
GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
} MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++VR_idx; ++VR_idx;
} else { } else {
// Vectors are aligned.
ArgOffset = ((ArgOffset+15)/16)*16;
CurArgOffset = ArgOffset; CurArgOffset = ArgOffset;
ArgOffset += 16;
needsLoad = true; needsLoad = true;
} }
ArgOffset += 16;
GPR_idx = std::min(GPR_idx + 2, Num_GPR_Regs);
break; break;
} }
@ -2699,7 +2684,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// call optimized functions' reserved stack space needs to be aligned so that // call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned // taking the difference between two stack areas will result in an aligned
// stack. // stack.
setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); setMinReservedArea(MF, DAG, 0, MinReservedArea, true);
// If the function takes variable number of arguments, make a frame index for // If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start. // the start of the first vararg value... for expansion of llvm.va_start.
@ -4326,17 +4311,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v16i8: case MVT::v16i8:
case MVT::v2f64: case MVT::v2f64:
case MVT::v2i64: case MVT::v2i64:
if (isVarArg) { // Vectors are aligned to a 16-byte boundary in the argument save area.
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
// V registers; in fact gcc does this only for arguments that are
// prototyped, not for those that match the ... We do it for all
// arguments, seems to work.
while (ArgOffset % 16 !=0) { while (ArgOffset % 16 !=0) {
ArgOffset += PtrByteSize; ArgOffset += PtrByteSize;
if (GPR_idx != NumGPRs) if (GPR_idx != NumGPRs)
GPR_idx++; GPR_idx++;
} }
// For a varargs call, named arguments go into VRs or on the stack as
// usual; unnamed arguments always go to the stack or the corresponding
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
if (isVarArg) {
// We could elide this store in the case where the object fits // We could elide this store in the case where the object fits
// entirely in R registers. Maybe later. // entirely in R registers. Maybe later.
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
@ -4371,10 +4357,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
break; break;
} }
// Non-varargs Altivec params generally go in registers, but have // Non-varargs Altivec params go into VRs or on the stack.
// stack space allocated at the end.
if (VR_idx != NumVRs) { if (VR_idx != NumVRs) {
// Doesn't have GPR space allocated.
unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
Arg.getSimpleValueType() == MVT::v2i64) ? Arg.getSimpleValueType() == MVT::v2i64) ?
VSRH[VR_idx] : VR[VR_idx]; VSRH[VR_idx] : VR[VR_idx];
@ -4385,8 +4369,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains, true, isTailCall, true, MemOpChains,
TailCallArguments, dl); TailCallArguments, dl);
ArgOffset += 16;
} }
ArgOffset += 16;
GPR_idx = std::min(GPR_idx + 2, NumGPRs);
break; break;
} }
} }

View File

@ -0,0 +1,25 @@
; RUN: llc < %s -march=ppc64 -mattr=+altivec | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Verify that in the 64-bit Linux ABI, vector arguments take up space
; in the parameter save area.
define i64 @callee(i64 %a, <4 x i32> %b, i64 %c, <4 x i32> %d, i64 %e) {
entry:
ret i64 %e
}
; CHECK-LABEL: callee:
; CHECK: ld 3, 112(1)
define void @caller(i64 %x, <4 x i32> %y) {
entry:
tail call void @test(i64 %x, <4 x i32> %y, i64 %x, <4 x i32> %y, i64 %x)
ret void
}
; CHECK-LABEL: caller:
; CHECK: std 3, 112(1)
declare void @test(i64, <4 x i32>, i64, <4 x i32>, i64)