PPC: Allocate RS spill slot for unaligned i64 load/store

This fixes another bug found by llvm-stress!

If we happen to be doing an i64 load or store into a stack slot that has less
than a 4-byte alignment, then the frame-index elimination may need to use an
indexed load or store instruction (because the offset may not be a multiple of
4, a requirement of the STD/LD instructions). The extra register needed to hold
the offset comes from the register scavenger, and it is possible that the
scavenger will need to use an emergency spill slot. As a result, we need to
make sure that a spill slot is allocated when doing an i64 load/store into a
less-than-4-byte-aligned stack slot.

Because test cases for things like this tend to be fairly fragile, I've
concatenated a few small bugpoint-reduced test cases together to form the
regression test.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185907 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2013-07-09 06:34:51 +00:00
parent be6b9101d4
commit fa55969acb
2 changed files with 152 additions and 2 deletions

View File

@ -1030,6 +1030,35 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
return false;
}
// If we happen to be doing an i64 load or store into a stack slot that has
// less than a 4-byte alignment, then the frame-index elimination may need to
// use an indexed load or store instruction (because the offset may not be a
// multiple of 4). The extra register needed to hold the offset comes from the
// register scavenger, and it is possible that the scavenger will need to use
// an emergency spill slot. As a result, we need to make sure that a spill slot
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
// stack slot.
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
// FIXME: This does not handle the LWA case.
if (VT != MVT::i64)
return;
// This should not be needed for negative FIs, which come from argument
// lowering, because the ABI should guarentee the necessary alignment.
if (FrameIdx < 0)
return;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Align = MFI->getObjectAlignment(FrameIdx);
if (Align >= 4)
return;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasNonRISpills();
}
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If Aligned is true, only accept displacements
@ -1051,6 +1080,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
@ -1115,9 +1145,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
Disp = DAG.getTargetConstant(0, getPointerTy());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
else
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else
Base = N;
return true; // [r+0]
}

View File

@ -0,0 +1,119 @@
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
target triple = "powerpc64-unknown-linux-gnu"
define void @autogen_SD4932(i8) {
BB:
%A4 = alloca i8
%A = alloca <1 x ppc_fp128>
%Shuff = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 undef, i32 29, i32 31, i32 1, i32 3, i32 5>
br label %CF
CF: ; preds = %CF80, %CF, %BB
%L5 = load i64* undef
store i8 %0, i8* %A4
%Shuff7 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> %Shuff, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26>
%PC10 = bitcast i8* %A4 to ppc_fp128*
br i1 undef, label %CF, label %CF77
CF77: ; preds = %CF81, %CF83, %CF77, %CF
br i1 undef, label %CF77, label %CF82
CF82: ; preds = %CF82, %CF77
%L19 = load i64* undef
store <1 x ppc_fp128> zeroinitializer, <1 x ppc_fp128>* %A
store i8 -65, i8* %A4
br i1 undef, label %CF82, label %CF83
CF83: ; preds = %CF82
%L34 = load i64* undef
br i1 undef, label %CF77, label %CF81
CF81: ; preds = %CF83
%Shuff43 = shufflevector <16 x i32> %Shuff7, <16 x i32> undef, <16 x i32> <i32 15, i32 17, i32 19, i32 21, i32 23, i32 undef, i32 undef, i32 29, i32 31, i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
store ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128* %PC10
br i1 undef, label %CF77, label %CF78
CF78: ; preds = %CF78, %CF81
br i1 undef, label %CF78, label %CF79
CF79: ; preds = %CF79, %CF78
br i1 undef, label %CF79, label %CF80
CF80: ; preds = %CF79
store i64 %L19, i64* undef
%Cmp75 = icmp uge i32 206779, undef
br i1 %Cmp75, label %CF, label %CF76
CF76: ; preds = %CF80
store i64 %L5, i64* undef
store i64 %L34, i64* undef
ret void
}
define void @autogen_SD88042(i8*, i32*, i8) {
BB:
%A4 = alloca <2 x i1>
%A = alloca <16 x float>
%L = load i8* %0
%Sl = select i1 false, <16 x float>* %A, <16 x float>* %A
%PC = bitcast <2 x i1>* %A4 to i64*
%Sl27 = select i1 false, i8 undef, i8 %L
br label %CF
CF: ; preds = %CF78, %CF, %BB
%PC33 = bitcast i32* %1 to i32*
br i1 undef, label %CF, label %CF77
CF77: ; preds = %CF80, %CF77, %CF
store <16 x float> zeroinitializer, <16 x float>* %Sl
%L58 = load i32* %PC33
store i8 0, i8* %0
br i1 undef, label %CF77, label %CF80
CF80: ; preds = %CF77
store i64 0, i64* %PC
%E67 = extractelement <8 x i1> zeroinitializer, i32 1
br i1 %E67, label %CF77, label %CF78
CF78: ; preds = %CF80
%Cmp73 = icmp eq i32 189865, %L58
br i1 %Cmp73, label %CF, label %CF76
CF76: ; preds = %CF78
store i8 %2, i8* %0
store i8 %Sl27, i8* %0
ret void
}
define void @autogen_SD37497(i8*, i32*, i64*) {
BB:
%A1 = alloca i1
%I8 = insertelement <1 x i32> <i32 -1>, i32 454855, i32 0
%Cmp = icmp ult <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, undef
%L10 = load i64* %2
%E11 = extractelement <4 x i1> %Cmp, i32 2
br label %CF72
CF72: ; preds = %CF74, %CF72, %BB
store double 0xB47BB29A53790718, double* undef
%E18 = extractelement <1 x i32> <i32 -1>, i32 0
%FC22 = sitofp <1 x i32> %I8 to <1 x float>
br i1 undef, label %CF72, label %CF74
CF74: ; preds = %CF72
store i8 0, i8* %0
%PC = bitcast i1* %A1 to i64*
%L31 = load i64* %PC
store i64 477323, i64* %PC
%Sl37 = select i1 false, i32* undef, i32* %1
%Cmp38 = icmp ugt i1 undef, undef
br i1 %Cmp38, label %CF72, label %CF73
CF73: ; preds = %CF74
store i64 %L31, i64* %PC
%B55 = fdiv <1 x float> undef, %FC22
%Sl63 = select i1 %E11, i32* undef, i32* %Sl37
store i32 %E18, i32* %Sl63
store i64 %L10, i64* %PC
ret void
}