mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-12 18:33:22 +00:00
Convert the PPC backend to use the new FMA infrastructure.
The existing contraction patterns are replaced with fma/fneg. Overall functionality should be the same. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158955 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cd88efe516
commit
070b8dba80
@ -132,12 +132,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
||||
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FPOW , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f64, Legal);
|
||||
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FPOW , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f32, Legal);
|
||||
|
||||
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
|
||||
|
||||
@ -378,6 +378,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
||||
addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
|
||||
@ -5876,6 +5877,26 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
}
|
||||
}
|
||||
|
||||
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
|
||||
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
|
||||
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
|
||||
/// is expanded to mul + add.
|
||||
bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
|
||||
if (!VT.isSimple())
|
||||
return false;
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
case MVT::v4f32:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
|
||||
if (DisableILPPref)
|
||||
return TargetLowering::getSchedulingPreference(N);
|
||||
|
@ -366,6 +366,12 @@ namespace llvm {
|
||||
bool IsZeroVal, bool MemcpyStrSrc,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
|
||||
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
|
||||
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
|
||||
/// is expanded to mul + add.
|
||||
virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
|
||||
|
||||
private:
|
||||
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
|
||||
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
|
||||
|
@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations.
|
||||
// VA-Form instructions. 3-input AltiVec ops.
|
||||
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
|
||||
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
|
||||
[(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
|
||||
VRRC:$vB))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
|
||||
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
|
||||
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
|
||||
[(set VRRC:$vD, (fsub V_immneg0,
|
||||
(fsub (fmul VRRC:$vA, VRRC:$vC),
|
||||
VRRC:$vB)))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
|
||||
(fneg VRRC:$vB))))]>;
|
||||
|
||||
def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
|
||||
def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
|
||||
|
@ -353,7 +353,6 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Instruction Predicate Definitions.
|
||||
def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;
|
||||
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
|
||||
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
|
||||
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
|
||||
@ -1312,51 +1311,43 @@ let Uses = [RM] in {
|
||||
def FMADD : AForm_1<63, 29,
|
||||
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
||||
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
|
||||
[(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
|
||||
F8RC:$FRB))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F8RC:$FRT,
|
||||
(fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
|
||||
def FMADDS : AForm_1<59, 29,
|
||||
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
||||
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
||||
[(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
|
||||
F4RC:$FRB))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F4RC:$FRT,
|
||||
(fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
|
||||
def FMSUB : AForm_1<63, 28,
|
||||
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
||||
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
|
||||
[(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
|
||||
F8RC:$FRB))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F8RC:$FRT,
|
||||
(fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
|
||||
def FMSUBS : AForm_1<59, 28,
|
||||
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
||||
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
||||
[(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
|
||||
F4RC:$FRB))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F4RC:$FRT,
|
||||
(fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
|
||||
def FNMADD : AForm_1<63, 31,
|
||||
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
||||
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
|
||||
[(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
|
||||
F8RC:$FRB)))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F8RC:$FRT,
|
||||
(fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
|
||||
def FNMADDS : AForm_1<59, 31,
|
||||
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
||||
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
||||
[(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
|
||||
F4RC:$FRB)))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F4RC:$FRT,
|
||||
(fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
|
||||
def FNMSUB : AForm_1<63, 30,
|
||||
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
||||
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
|
||||
[(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
|
||||
F8RC:$FRB)))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
|
||||
(fneg F8RC:$FRB))))]>;
|
||||
def FNMSUBS : AForm_1<59, 30,
|
||||
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
||||
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
||||
[(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
|
||||
F4RC:$FRB)))]>,
|
||||
Requires<[FPContractions]>;
|
||||
[(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
|
||||
(fneg F4RC:$FRB))))]>;
|
||||
}
|
||||
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
|
||||
// having 4 of these, force the comparison to always be an 8-byte double (code
|
||||
@ -1517,14 +1508,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
|
||||
def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
|
||||
(ADDIS GPRC:$in, tblockaddress:$g)>;
|
||||
|
||||
// Fused negative multiply subtract, alternate pattern
|
||||
def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
|
||||
(FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
|
||||
Requires<[FPContractions]>;
|
||||
def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
|
||||
(FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
|
||||
Requires<[FPContractions]>;
|
||||
|
||||
// Standard shifts. These are represented separately from the real shifts above
|
||||
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
|
||||
// amounts.
|
||||
|
Loading…
x
Reference in New Issue
Block a user