[mips] Enhance command line option "-mno-ldc1-sdc1" to expand base+index double

precision loads and stores as well as reg+imm double precision loads and stores.

Previously, expansion of loads and stores was done after register allocation,
but now it takes place during legalization. As a result, users will see double
precision stores and loads being emitted to spill and restore 64-bit FP registers.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190235 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Akira Hatanaka 2013-09-07 00:52:30 +00:00
parent d65d2fde4e
commit 3e6758541b
7 changed files with 147 additions and 89 deletions

View File

@ -329,6 +329,9 @@ namespace llvm {
SmallVector<ByValArgInfo, 2> ByValArgs;
};
protected:
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
// Subtarget Info
const MipsSubtarget *Subtarget;
@ -366,8 +369,6 @@ namespace llvm {
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
bool IsSRA) const;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;

View File

@ -368,12 +368,8 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
}
let Predicates = [NotFP64bit, HasStdEnc] in {
let isPseudo = 1, isCodeGenOnly = 1 in {
def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, load>;
def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, store>;
}
def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad>, LW_FM<0x35>;
def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore>, LW_FM<0x3d>;
def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>;
def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>;
}
// Indexed loads and stores.
@ -595,7 +591,7 @@ let AddedComplexity = 40 in {
}
let Predicates = [NotFP64bit, HasStdEnc] in {
def : LoadRegImmPat<PseudoLDC1, f64, load>;
def : StoreRegImmPat<PseudoSDC1, f64>;
def : LoadRegImmPat<LDC1, f64, load>;
def : StoreRegImmPat<SDC1, f64>;
}
}

View File

@ -25,6 +25,11 @@ static cl::opt<bool>
EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
cl::desc("MIPS: Enable tail calls."), cl::init(false));
static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
cl::desc("Expand double precision loads and "
"stores to their single precision "
"counterparts"));
MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
: MipsTargetLowering(TM) {
// Set up the register classes
@ -129,6 +134,11 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
if (NoDPLoadStore) {
setOperationAction(ISD::LOAD, MVT::f64, Custom);
setOperationAction(ISD::STORE, MVT::f64, Custom);
}
computeRegisterProperties();
}
@ -168,6 +178,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch(Op.getOpcode()) {
case ISD::LOAD: return lowerLOAD(Op, DAG);
case ISD::STORE: return lowerSTORE(Op, DAG);
case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
@ -611,6 +623,68 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
InternalLinkage, CLI, Callee, Chain);
}
SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode &Nd = *cast<LoadSDNode>(Op);
if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
return MipsTargetLowering::lowerLOAD(Op, DAG);
// Replace a double precision load with two i32 loads and a buildpair64.
SDLoc DL(Op);
SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
EVT PtrVT = Ptr.getValueType();
// i32 load from lower address.
SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr,
MachinePointerInfo(), Nd.isVolatile(),
Nd.isNonTemporal(), Nd.isInvariant(),
Nd.getAlignment());
// i32 load from higher address.
Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr,
MachinePointerInfo(), Nd.isVolatile(),
Nd.isNonTemporal(), Nd.isInvariant(),
Nd.getAlignment());
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
SDValue Ops[2] = {BP, Hi.getValue(1)};
return DAG.getMergeValues(Ops, 2, DL);
}
SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode &Nd = *cast<StoreSDNode>(Op);
if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
return MipsTargetLowering::lowerSTORE(Op, DAG);
// Replace a double precision store with two extractelement64s and i32 stores.
SDLoc DL(Op);
SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
EVT PtrVT = Ptr.getValueType();
SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Val, DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Val, DAG.getConstant(1, MVT::i32));
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
// i32 store to lower address.
Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
Nd.getTBAAInfo());
// i32 store to higher address.
Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
Nd.getTBAAInfo());
}
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {

View File

@ -58,6 +58,9 @@ namespace llvm {
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
SelectionDAG &DAG) const;

View File

@ -24,11 +24,6 @@
using namespace llvm;
static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
cl::desc("Expand double precision loads and "
"stores to their single precision "
"counterparts."));
MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm,
tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
@ -294,12 +289,6 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::ExtractElementF64_64:
expandExtractElementF64(MBB, MI, true);
break;
case Mips::PseudoLDC1:
expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1);
break;
case Mips::PseudoSDC1:
expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1);
break;
case Mips::MIPSeh_return32:
case Mips::MIPSeh_return64:
expandEhReturn(MBB, MI);
@ -484,56 +473,6 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
.addReg(HiReg);
}
/// Add 4 to the displacement of operand MO.
static void fixDisp(MachineOperand &MO) {
switch (MO.getType()) {
default:
llvm_unreachable("Unhandled operand type.");
case MachineOperand::MO_Immediate:
MO.setImm(MO.getImm() + 4);
break;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_BlockAddress:
case MachineOperand::MO_TargetIndex:
case MachineOperand::MO_ExternalSymbol:
MO.setOffset(MO.getOffset() + 4);
break;
}
}
void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned OpcD, unsigned OpcS) const {
// If NoDPLoadStore is false, just change the opcode.
if (!NoDPLoadStore) {
genInstrWithNewOpc(OpcD, I);
return;
}
// Expand a double precision FP load or store to two single precision
// instructions.
const TargetRegisterInfo &TRI = getRegisterInfo();
const MachineOperand &ValReg = I->getOperand(0);
unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_lo);
unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_hi);
if (!TM.getSubtarget<MipsSubtarget>().isLittle())
std::swap(LoReg, HiReg);
// Create an instruction which loads from or stores to the lower memory
// address.
MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I);
MIB->getOperand(0).setReg(LoReg);
// Create an instruction which loads from or stores to the higher memory
// address.
MIB = genInstrWithNewOpc(OpcS, I);
MIB->getOperand(0).setReg(HiReg);
fixDisp(MIB->getOperand(2));
}
void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
// This pseudo instruction is generated as part of the lowering of

View File

@ -104,9 +104,6 @@ private:
MachineBasicBlock::iterator I, bool FP64) const;
void expandBuildPairF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, bool FP64) const;
void expandDPLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, unsigned OpcD,
unsigned OpcS) const;
void expandEhReturn(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
};

View File

@ -1,22 +1,31 @@
; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 < %s | \
; RUN: FileCheck %s -check-prefix=LE-PIC
; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s -check-prefix=LE-PIC
; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
; RUN: FileCheck %s -check-prefix=LE-STATIC
; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
; RUN: FileCheck %s -check-prefix=BE-PIC
; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK-LDC1-SDC1
; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1
@g0 = common global double 0.000000e+00, align 8
; LE-PIC-LABEL: test_ldc1:
; LE-PIC: lwc1 $f0, 0(${{[0-9]+}})
; LE-PIC: lwc1 $f1, 4(${{[0-9]+}})
; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
; LE-PIC-DAG: mtc1 $[[R0]], $f0
; LE-PIC-DAG: mtc1 $[[R1]], $f1
; LE-STATIC-LABEL: test_ldc1:
; LE-STATIC: lwc1 $f0, %lo(g0)(${{[0-9]+}})
; LE-STATIC: lwc1 $f1, %lo(g0+4)(${{[0-9]+}})
; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
; LE-STATIC-DAG: mtc1 $[[R1]], $f0
; LE-STATIC-DAG: mtc1 $[[R3]], $f1
; BE-PIC-LABEL: test_ldc1:
; BE-PIC: lwc1 $f1, 0(${{[0-9]+}})
; BE-PIC: lwc1 $f0, 4(${{[0-9]+}})
; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
; BE-PIC-DAG: mtc1 $[[R1]], $f0
; BE-PIC-DAG: mtc1 $[[R0]], $f1
; CHECK-LDC1-SDC1-LABEL: test_ldc1:
; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}
@ -27,14 +36,22 @@ entry:
}
; LE-PIC-LABEL: test_sdc1:
; LE-PIC: swc1 $f12, 0(${{[0-9]+}})
; LE-PIC: swc1 $f13, 4(${{[0-9]+}})
; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
; LE-STATIC-LABEL: test_sdc1:
; LE-STATIC: swc1 $f12, %lo(g0)(${{[0-9]+}})
; LE-STATIC: swc1 $f13, %lo(g0+4)(${{[0-9]+}})
; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
; BE-PIC-LABEL: test_sdc1:
; BE-PIC: swc1 $f13, 0(${{[0-9]+}})
; BE-PIC: swc1 $f12, 4(${{[0-9]+}})
; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}})
; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}})
; CHECK-LDC1-SDC1-LABEL: test_sdc1:
; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}
@ -43,3 +60,34 @@ entry:
store double %a, double* @g0, align 8
ret void
}
; CHECK-LE-PIC-DAG-LABEL: test_ldxc1:
; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
; LE-PIC-DAG: mtc1 $[[R0]], $f0
; LE-PIC-DAG: mtc1 $[[R1]], $f1
; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
entry:
%arrayidx = getelementptr inbounds double* %a, i32 %i
%0 = load double* %arrayidx, align 8
ret double %0
}
; LE-PIC-DAG-LABEL: test_sdxc1:
; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
; CHECK-LDC1-SDC1-LABEL: test_sdxc1:
; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}}
define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
entry:
%arrayidx = getelementptr inbounds double* %a, i32 %i
store double %b, double* %arrayidx, align 8
ret void
}