mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-05 14:34:55 +00:00
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2: extsw r2, r3 std r2, -8(r1) lfd f0, -8(r1) fcfid f0, f0 frsp f1, f0 blr instead of this: _foo2: lis r2, ha16(LCPI2_0) lis r4, 17200 xoris r3, r3, 32768 stw r3, -4(r1) stw r4, -8(r1) lfs f0, lo16(LCPI2_0)(r2) lfd f1, -8(r1) fsub f0, f1, f0 frsp f1, f0 blr This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s with llcbeta (16.7% and 38.1% respectively). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e5ba580ab0
commit
ecfe55e65b
@ -245,8 +245,9 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) {
|
||||
case PPC::STFIWX:
|
||||
ThisStoreSize = 4;
|
||||
break;
|
||||
case PPC::STD_32:
|
||||
case PPC::STDX_32:
|
||||
case PPC::STD:
|
||||
case PPC::STDU:
|
||||
case PPC::STFD:
|
||||
case PPC::STFDX:
|
||||
case PPC::STDX:
|
||||
|
@ -140,6 +140,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
||||
// They also have instructions for converting between i64 and fp.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
|
||||
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
|
||||
} else {
|
||||
@ -222,6 +223,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case PPCISD::SRL: return "PPCISD::SRL";
|
||||
case PPCISD::SRA: return "PPCISD::SRA";
|
||||
case PPCISD::SHL: return "PPCISD::SHL";
|
||||
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
|
||||
case PPCISD::STD_32: return "PPCISD::STD_32";
|
||||
case PPCISD::CALL: return "PPCISD::CALL";
|
||||
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
|
||||
}
|
||||
@ -302,15 +305,41 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
|
||||
return Bits;
|
||||
}
|
||||
case ISD::SINT_TO_FP: {
|
||||
assert(MVT::i64 == Op.getOperand(0).getValueType() &&
|
||||
"Unhandled SINT_TO_FP type in custom expander!");
|
||||
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
|
||||
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
|
||||
if (MVT::f32 == Op.getValueType())
|
||||
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
||||
return FP;
|
||||
}
|
||||
case ISD::SINT_TO_FP:
|
||||
if (Op.getOperand(0).getValueType() == MVT::i64) {
|
||||
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
|
||||
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
|
||||
if (Op.getValueType() == MVT::f32)
|
||||
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
||||
return FP;
|
||||
} else {
|
||||
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
|
||||
"Unhandled SINT_TO_FP type in custom expander!");
|
||||
// Since we only generate this in 64-bit mode, we can take advantage of
|
||||
// 64-bit registers. In particular, sign extend the input value into the
|
||||
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
|
||||
// then lfd it and fcfid it.
|
||||
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
||||
int FrameIdx = FrameInfo->CreateStackObject(8, 8);
|
||||
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
|
||||
|
||||
SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
|
||||
Op.getOperand(0));
|
||||
|
||||
// STD the extended value into the stack slot.
|
||||
SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
|
||||
DAG.getEntryNode(), Ext64, FIdx,
|
||||
DAG.getSrcValue(NULL));
|
||||
// Load the value as a double.
|
||||
SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
|
||||
|
||||
// FCFID it and return it.
|
||||
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
|
||||
if (Op.getValueType() == MVT::f32)
|
||||
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
||||
return FP;
|
||||
}
|
||||
|
||||
case ISD::SELECT_CC: {
|
||||
// Turn FP only select_cc's into fsel instructions.
|
||||
if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
|
||||
@ -1106,27 +1135,30 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
default: break;
|
||||
case ISD::SINT_TO_FP:
|
||||
if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
|
||||
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
|
||||
// We allow the src/dst to be either f32/f64, but force the intermediate
|
||||
// type to be i64.
|
||||
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT &&
|
||||
N->getOperand(0).getValueType() == MVT::i64) {
|
||||
|
||||
SDOperand Val = N->getOperand(0).getOperand(0);
|
||||
if (Val.getValueType() == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
||||
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
|
||||
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
|
||||
// We allow the src/dst to be either f32/f64, but the intermediate
|
||||
// type must be i64.
|
||||
if (N->getOperand(0).getValueType() == MVT::i64) {
|
||||
SDOperand Val = N->getOperand(0).getOperand(0);
|
||||
if (Val.getValueType() == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
}
|
||||
|
||||
Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
}
|
||||
|
||||
Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
if (N->getValueType(0) == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
|
||||
Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
if (N->getValueType(0) == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
}
|
||||
return Val;
|
||||
} else if (N->getOperand(0).getValueType() == MVT::i32) {
|
||||
// If the intermediate type is i32, we can avoid the load/store here
|
||||
// too.
|
||||
}
|
||||
return Val;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -75,7 +75,14 @@ namespace llvm {
|
||||
/// shift amounts. These nodes are generated by the multi-precision shift
|
||||
/// code.
|
||||
SRL, SRA, SHL,
|
||||
|
||||
/// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
|
||||
/// registers.
|
||||
EXTSW_32,
|
||||
|
||||
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
|
||||
STD_32,
|
||||
|
||||
/// CALL - A function call.
|
||||
CALL,
|
||||
|
||||
|
@ -58,6 +58,9 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDT_PPCShiftOp>;
|
||||
def PPCsra : SDNode<"PPCISD::SRA" , SDT_PPCShiftOp>;
|
||||
def PPCshl : SDNode<"PPCISD::SHL" , SDT_PPCShiftOp>;
|
||||
|
||||
def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
|
||||
def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, [SDNPHasChain]>;
|
||||
|
||||
// These are target-independent nodes, but have target-specific formats.
|
||||
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeq,[SDNPHasChain]>;
|
||||
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeq,[SDNPHasChain]>;
|
||||
@ -194,11 +197,17 @@ def memrr : Operand<i32> {
|
||||
let NumMIOperands = 2;
|
||||
let MIOperandInfo = (ops GPRC, GPRC);
|
||||
}
|
||||
def memrix : Operand<i32> { // memri where the imm is shifted 2 bits.
|
||||
let PrintMethod = "printMemRegImmShifted";
|
||||
let NumMIOperands = 2;
|
||||
let MIOperandInfo = (ops i32imm, GPRC);
|
||||
}
|
||||
|
||||
// Define PowerPC specific addressing mode.
|
||||
def iaddr : ComplexPattern<i32, 2, "SelectAddrImm", []>;
|
||||
def xaddr : ComplexPattern<i32, 2, "SelectAddrIdx", []>;
|
||||
def xoaddr : ComplexPattern<i32, 2, "SelectAddrIdxOnly",[]>;
|
||||
def ixaddr : ComplexPattern<i32, 2, "SelectAddrImmShift", []>; // "std"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Instruction Predicate Definitions.
|
||||
@ -428,9 +437,15 @@ let isStore = 1, noResults = 1, PPC970_Unit = 2 in {
|
||||
def STD : DSForm_2<62, 0, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA),
|
||||
"std $rT, $DS($rA)", LdStSTD,
|
||||
[]>, isPPC64;
|
||||
def STDU : DSForm_2<62, 1, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA),
|
||||
"stdu $rT, $DS($rA)", LdStSTD,
|
||||
[]>, isPPC64;
|
||||
|
||||
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
|
||||
def STD_32 : DSForm_2<62, 0, (ops GPRC:$rT, memrix:$dst),
|
||||
"std $rT, $dst", LdStSTD,
|
||||
[(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
|
||||
def STDX_32 : XForm_8<31, 149, (ops GPRC:$rT, memrr:$dst),
|
||||
"stdx $rT, $dst", LdStSTD,
|
||||
[(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
}
|
||||
|
||||
// X-Form instructions. Most instructions that perform an operation on a
|
||||
@ -586,6 +601,11 @@ def EXTSH : XForm_11<31, 922, (ops GPRC:$rA, GPRC:$rS),
|
||||
def EXTSW : XForm_11<31, 986, (ops G8RC:$rA, G8RC:$rS),
|
||||
"extsw $rA, $rS", IntGeneral,
|
||||
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
|
||||
/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
|
||||
def EXTSW_32 : XForm_11<31, 986, (ops GPRC:$rA, GPRC:$rS),
|
||||
"extsw $rA, $rS", IntGeneral,
|
||||
[(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
|
||||
|
||||
def CMP : XForm_16<31, 0, (ops CRRC:$crD, i1imm:$long, GPRC:$rA, GPRC:$rB),
|
||||
"cmp $crD, $long, $rA, $rB", IntCompare>;
|
||||
def CMPL : XForm_16<31, 32, (ops CRRC:$crD, i1imm:$long, GPRC:$rA, GPRC:$rB),
|
||||
|
@ -276,7 +276,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const {
|
||||
case PPC::LWA:
|
||||
case PPC::LD:
|
||||
case PPC::STD:
|
||||
case PPC::STDU:
|
||||
case PPC::STD_32:
|
||||
assert((Offset & 3) == 0 && "Invalid frame offset!");
|
||||
Offset >>= 2; // The actual encoded value has the low two bits zero.
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user