mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
Custom lower FMA intrinsics to target specific nodes and remove the patterns.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162534 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
873cf0a0d7
commit
0e292376d0
@ -10077,6 +10077,78 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
|
||||
return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
|
||||
}
|
||||
case Intrinsic::x86_fma_vfmadd_ps:
|
||||
case Intrinsic::x86_fma_vfmadd_pd:
|
||||
case Intrinsic::x86_fma_vfmsub_ps:
|
||||
case Intrinsic::x86_fma_vfmsub_pd:
|
||||
case Intrinsic::x86_fma_vfnmadd_ps:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd:
|
||||
case Intrinsic::x86_fma_vfnmsub_ps:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd:
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd:
|
||||
case Intrinsic::x86_fma_vfmadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmadd_pd_256:
|
||||
case Intrinsic::x86_fma_vfmsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfnmadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd_256:
|
||||
case Intrinsic::x86_fma_vfnmsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd_256: {
|
||||
// Only lower intrinsics if FMA is enabled. FMA4 still uses patterns.
|
||||
if (!Subtarget->hasFMA())
|
||||
return SDValue();
|
||||
|
||||
unsigned Opc;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
case Intrinsic::x86_fma_vfmadd_ps:
|
||||
case Intrinsic::x86_fma_vfmadd_pd:
|
||||
case Intrinsic::x86_fma_vfmadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmadd_pd_256:
|
||||
Opc = X86ISD::FMADD;
|
||||
break;
|
||||
case Intrinsic::x86_fma_vfmsub_ps:
|
||||
case Intrinsic::x86_fma_vfmsub_pd:
|
||||
case Intrinsic::x86_fma_vfmsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsub_pd_256:
|
||||
Opc = X86ISD::FMSUB;
|
||||
break;
|
||||
case Intrinsic::x86_fma_vfnmadd_ps:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd:
|
||||
case Intrinsic::x86_fma_vfnmadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd_256:
|
||||
Opc = X86ISD::FNMADD;
|
||||
break;
|
||||
case Intrinsic::x86_fma_vfnmsub_ps:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd:
|
||||
case Intrinsic::x86_fma_vfnmsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd_256:
|
||||
Opc = X86ISD::FNMSUB;
|
||||
break;
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd:
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd_256:
|
||||
Opc = X86ISD::FMADDSUB;
|
||||
break;
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd_256:
|
||||
Opc = X86ISD::FMSUBADD;
|
||||
break;
|
||||
}
|
||||
|
||||
return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -112,137 +112,6 @@ let ExeDomain = SSEPackedDouble in {
|
||||
v4f64>, VEX_W;
|
||||
}
|
||||
|
||||
let Predicates = [HasFMA] in {
|
||||
def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
|
||||
(memopv4f32 addr:$src3)),
|
||||
(VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
|
||||
(memopv4f32 addr:$src3)),
|
||||
(VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
|
||||
(memopv4f32 addr:$src3)),
|
||||
(VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
|
||||
(memopv4f32 addr:$src3)),
|
||||
(VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
|
||||
(memopv8f32 addr:$src3)),
|
||||
(VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
|
||||
(memopv8f32 addr:$src3)),
|
||||
(VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
|
||||
(memopv8f32 addr:$src3)),
|
||||
(VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
|
||||
(memopv8f32 addr:$src3)),
|
||||
(VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
|
||||
(memopv2f64 addr:$src3)),
|
||||
(VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
|
||||
(memopv2f64 addr:$src3)),
|
||||
(VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
|
||||
(memopv2f64 addr:$src3)),
|
||||
(VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
|
||||
(memopv2f64 addr:$src3)),
|
||||
(VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
|
||||
(memopv4f64 addr:$src3)),
|
||||
(VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
|
||||
(memopv4f64 addr:$src3)),
|
||||
(VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
|
||||
(memopv4f64 addr:$src3)),
|
||||
(VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
|
||||
(memopv4f64 addr:$src3)),
|
||||
(VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
|
||||
(memopv4f32 addr:$src3)),
|
||||
(VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
|
||||
(memopv4f32 addr:$src3)),
|
||||
(VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
|
||||
(memopv8f32 addr:$src3)),
|
||||
(VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
|
||||
(memopv8f32 addr:$src3)),
|
||||
(VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
|
||||
(memopv2f64 addr:$src3)),
|
||||
(VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
|
||||
(VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
|
||||
(memopv2f64 addr:$src3)),
|
||||
(VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
|
||||
(memopv4f64 addr:$src3)),
|
||||
(VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
|
||||
(VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
|
||||
(memopv4f64 addr:$src3)),
|
||||
(VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
|
||||
} // Predicates = [HasFMA]
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
|
||||
RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
|
||||
|
@ -205,7 +205,7 @@ public:
|
||||
bool hasAES() const { return HasAES; }
|
||||
bool hasPCLMUL() const { return HasPCLMUL; }
|
||||
bool hasFMA() const { return HasFMA; }
|
||||
// FIXME: Favor FMA when both are enabled. Is this right?
|
||||
// FIXME: Favor FMA when both are enabled. Is this the right thing to do?
|
||||
bool hasFMA4() const { return HasFMA4 && !HasFMA; }
|
||||
bool hasXOP() const { return HasXOP; }
|
||||
bool hasMOVBE() const { return HasMOVBE; }
|
||||
|
Loading…
x
Reference in New Issue
Block a user