mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-08 19:25:47 +00:00
updated patch for the ARM fused multiply add/sub
In this update: - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. - I kept setting .fpu=neon-vfpv4 code attribute because that is what the assembler understands. Patch by Ana Pazos <apazos@codeaurora.org> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152036 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -38,9 +38,9 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
|
|||||||
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
|
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
|
||||||
"Enable NEON instructions",
|
"Enable NEON instructions",
|
||||||
[FeatureVFP3]>;
|
[FeatureVFP3]>;
|
||||||
def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
|
def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true",
|
||||||
"Enable NEON-VFP4 instructions",
|
"Enable Advanced SIMD2 instructions",
|
||||||
[FeatureVFP4, FeatureNEON]>;
|
[FeatureNEON]>;
|
||||||
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
|
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
|
||||||
"Enable Thumb2 instructions">;
|
"Enable Thumb2 instructions">;
|
||||||
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
|
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
|
||||||
@@ -76,6 +76,8 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
|
|||||||
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
|
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
|
||||||
"true",
|
"true",
|
||||||
"Use NEON for single precision FP">;
|
"Use NEON for single precision FP">;
|
||||||
|
// Allow more precision in FP computation
|
||||||
|
def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
|
||||||
|
|
||||||
// Disable 32-bit to 16-bit narrowing for experimentation.
|
// Disable 32-bit to 16-bit narrowing for experimentation.
|
||||||
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
|
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
|
||||||
|
@@ -732,10 +732,10 @@ void ARMAsmPrinter::emitAttributes() {
|
|||||||
if (Subtarget->hasNEON() && emitFPU) {
|
if (Subtarget->hasNEON() && emitFPU) {
|
||||||
/* NEON is not exactly a VFP architecture, but GAS emit one of
|
/* NEON is not exactly a VFP architecture, but GAS emit one of
|
||||||
* neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
|
* neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
|
||||||
if (Subtarget->hasNEONVFP4())
|
if (Subtarget->hasNEON2())
|
||||||
AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
|
AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
|
||||||
else
|
else
|
||||||
AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
|
AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
|
||||||
/* If emitted for NEON, omit from VFP below, since you can have both
|
/* If emitted for NEON, omit from VFP below, since you can have both
|
||||||
* NEON and VFP in build attributes but only one .fpu */
|
* NEON and VFP in build attributes but only one .fpu */
|
||||||
emitFPU = false;
|
emitFPU = false;
|
||||||
|
@@ -184,9 +184,9 @@ def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
|
|||||||
def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
|
def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
|
||||||
def HasNEON : Predicate<"Subtarget->hasNEON()">,
|
def HasNEON : Predicate<"Subtarget->hasNEON()">,
|
||||||
AssemblerPredicate<"FeatureNEON">;
|
AssemblerPredicate<"FeatureNEON">;
|
||||||
def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">,
|
def HasNEON2 : Predicate<"Subtarget->hasNEON2()">,
|
||||||
AssemblerPredicate<"FeatureNEONVFP4">;
|
AssemblerPredicate<"FeatureNEON2">;
|
||||||
def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">;
|
def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">;
|
||||||
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
|
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
|
||||||
AssemblerPredicate<"FeatureFP16">;
|
AssemblerPredicate<"FeatureFP16">;
|
||||||
def HasDivide : Predicate<"Subtarget->hasDivide()">,
|
def HasDivide : Predicate<"Subtarget->hasDivide()">,
|
||||||
|
@@ -4060,10 +4060,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
|
|||||||
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
|
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
|
||||||
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
|
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
|
||||||
v2f32, fmul_su, fadd_mlx>,
|
v2f32, fmul_su, fadd_mlx>,
|
||||||
Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
|
Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
|
||||||
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
|
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
|
||||||
v4f32, fmul_su, fadd_mlx>,
|
v4f32, fmul_su, fadd_mlx>,
|
||||||
Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
|
Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
|
||||||
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
|
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
|
||||||
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
|
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
|
||||||
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
|
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
|
||||||
@@ -4118,10 +4118,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
|
|||||||
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
|
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
|
||||||
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
|
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
|
||||||
v2f32, fmul_su, fsub_mlx>,
|
v2f32, fmul_su, fsub_mlx>,
|
||||||
Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
|
Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
|
||||||
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
|
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
|
||||||
v4f32, fmul_su, fsub_mlx>,
|
v4f32, fmul_su, fsub_mlx>,
|
||||||
Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
|
Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
|
||||||
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
|
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
|
||||||
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
|
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
|
||||||
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
|
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
|
||||||
@@ -4174,19 +4174,19 @@ defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
|
|||||||
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
|
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
|
||||||
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
|
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
|
||||||
v2f32, fmul_su, fadd_mlx>,
|
v2f32, fmul_su, fadd_mlx>,
|
||||||
Requires<[HasNEONVFP4]>;
|
Requires<[HasNEON2,FPContractions]>;
|
||||||
|
|
||||||
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
|
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
|
||||||
v4f32, fmul_su, fadd_mlx>,
|
v4f32, fmul_su, fadd_mlx>,
|
||||||
Requires<[HasNEONVFP4]>;
|
Requires<[HasNEON2,FPContractions]>;
|
||||||
|
|
||||||
// Fused Vector Multiply Subtract (floating-point)
|
// Fused Vector Multiply Subtract (floating-point)
|
||||||
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
|
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
|
||||||
v2f32, fmul_su, fsub_mlx>,
|
v2f32, fmul_su, fsub_mlx>,
|
||||||
Requires<[HasNEONVFP4]>;
|
Requires<[HasNEON2,FPContractions]>;
|
||||||
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
|
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
|
||||||
v4f32, fmul_su, fsub_mlx>,
|
v4f32, fmul_su, fsub_mlx>,
|
||||||
Requires<[HasNEONVFP4]>;
|
Requires<[HasNEON2,FPContractions]>;
|
||||||
|
|
||||||
// Vector Subtract Operations.
|
// Vector Subtract Operations.
|
||||||
|
|
||||||
@@ -5541,13 +5541,13 @@ def : N3VSPat<fadd, VADDfd>;
|
|||||||
def : N3VSPat<fsub, VSUBfd>;
|
def : N3VSPat<fsub, VSUBfd>;
|
||||||
def : N3VSPat<fmul, VMULfd>;
|
def : N3VSPat<fmul, VMULfd>;
|
||||||
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
|
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
|
||||||
Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
|
Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
|
||||||
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
|
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
|
||||||
Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
|
Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
|
||||||
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
|
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
|
||||||
Requires<[HasNEONVFP4, UseNEONForFP]>;
|
Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
|
||||||
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
|
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
|
||||||
Requires<[HasNEONVFP4, UseNEONForFP]>;
|
Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
|
||||||
def : N2VSPat<fabs, VABSfd>;
|
def : N2VSPat<fabs, VABSfd>;
|
||||||
def : N2VSPat<fneg, VNEGfd>;
|
def : N2VSPat<fneg, VNEGfd>;
|
||||||
def : N3VSPat<NEONfmax, VMAXfd>;
|
def : N3VSPat<NEONfmax, VMAXfd>;
|
||||||
|
@@ -1030,7 +1030,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
|
|||||||
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
||||||
(f64 DPR:$Ddin)))]>,
|
(f64 DPR:$Ddin)))]>,
|
||||||
RegConstraint<"$Ddin = $Dd">,
|
RegConstraint<"$Ddin = $Dd">,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
|
|
||||||
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||||
@@ -1038,17 +1038,17 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
|||||||
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
||||||
SPR:$Sdin))]>,
|
SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]> {
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// VFP pipelines.
|
// VFP pipelines.
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
||||||
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
||||||
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]>;
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
|
||||||
|
|
||||||
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
||||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||||
@@ -1056,7 +1056,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
|||||||
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
||||||
(f64 DPR:$Ddin)))]>,
|
(f64 DPR:$Ddin)))]>,
|
||||||
RegConstraint<"$Ddin = $Dd">,
|
RegConstraint<"$Ddin = $Dd">,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
|
|
||||||
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||||
@@ -1064,17 +1064,17 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
|||||||
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
||||||
SPR:$Sdin))]>,
|
SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]> {
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// VFP pipelines.
|
// VFP pipelines.
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
||||||
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
||||||
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]>;
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
|
||||||
|
|
||||||
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
||||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||||
@@ -1082,7 +1082,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
|||||||
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
||||||
(f64 DPR:$Ddin)))]>,
|
(f64 DPR:$Ddin)))]>,
|
||||||
RegConstraint<"$Ddin = $Dd">,
|
RegConstraint<"$Ddin = $Dd">,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
|
|
||||||
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
|
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||||
@@ -1090,17 +1090,17 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
|
|||||||
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
||||||
SPR:$Sdin))]>,
|
SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]> {
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// VFP pipelines.
|
// VFP pipelines.
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
||||||
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
||||||
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]>;
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
|
||||||
|
|
||||||
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
||||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||||
@@ -1108,24 +1108,24 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
|||||||
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
||||||
(f64 DPR:$Ddin)))]>,
|
(f64 DPR:$Ddin)))]>,
|
||||||
RegConstraint<"$Ddin = $Dd">,
|
RegConstraint<"$Ddin = $Dd">,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
|
|
||||||
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
|
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||||
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
|
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
|
||||||
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]> {
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// VFP pipelines.
|
// VFP pipelines.
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
||||||
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4,FPContractions]>;
|
||||||
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
||||||
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP]>;
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// FP Conditional moves.
|
// FP Conditional moves.
|
||||||
|
@@ -49,7 +49,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
|
|||||||
, HasVFPv3(false)
|
, HasVFPv3(false)
|
||||||
, HasVFPv4(false)
|
, HasVFPv4(false)
|
||||||
, HasNEON(false)
|
, HasNEON(false)
|
||||||
, HasNEONVFPv4(false)
|
, HasNEON2(false)
|
||||||
, UseNEONForSinglePrecisionFP(false)
|
, UseNEONForSinglePrecisionFP(false)
|
||||||
, SlowFPVMLx(false)
|
, SlowFPVMLx(false)
|
||||||
, HasVMLxForwarding(false)
|
, HasVMLxForwarding(false)
|
||||||
|
@@ -51,7 +51,7 @@ protected:
|
|||||||
bool HasVFPv3;
|
bool HasVFPv3;
|
||||||
bool HasVFPv4;
|
bool HasVFPv4;
|
||||||
bool HasNEON;
|
bool HasNEON;
|
||||||
bool HasNEONVFPv4;
|
bool HasNEON2;
|
||||||
|
|
||||||
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
|
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
|
||||||
/// specified. Use the method useNEONForSinglePrecisionFP() to
|
/// specified. Use the method useNEONForSinglePrecisionFP() to
|
||||||
@@ -205,7 +205,7 @@ protected:
|
|||||||
bool hasVFP3() const { return HasVFPv3; }
|
bool hasVFP3() const { return HasVFPv3; }
|
||||||
bool hasVFP4() const { return HasVFPv4; }
|
bool hasVFP4() const { return HasVFPv4; }
|
||||||
bool hasNEON() const { return HasNEON; }
|
bool hasNEON() const { return HasNEON; }
|
||||||
bool hasNEONVFP4() const { return HasNEONVFPv4; }
|
bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); }
|
||||||
bool useNEONForSinglePrecisionFP() const {
|
bool useNEONForSinglePrecisionFP() const {
|
||||||
return hasNEON() && UseNEONForSinglePrecisionFP; }
|
return hasNEON() && UseNEONForSinglePrecisionFP; }
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s
|
; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
|
||||||
; Check generated fused MAC and MLS.
|
; Check generated fused MAC and MLS.
|
||||||
|
|
||||||
define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readnone noinline {
|
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
|
||||||
;CHECK: fusedMACTest1:
|
;CHECK: fusedMACTest1:
|
||||||
;CHECK: vfma.f64
|
;CHECK: vfma.f64
|
||||||
%1 = fmul double %d1, %d2
|
%1 = fmul double %d1, %d2
|
||||||
@@ -9,7 +9,7 @@ define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readno
|
|||||||
ret double %2
|
ret double %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone noinline {
|
define float @fusedMACTest2(float %f1, float %f2, float %f3) {
|
||||||
;CHECK: fusedMACTest2:
|
;CHECK: fusedMACTest2:
|
||||||
;CHECK: vfma.f32
|
;CHECK: vfma.f32
|
||||||
%1 = fmul float %f1, %f2
|
%1 = fmul float %f1, %f2
|
||||||
@@ -17,7 +17,7 @@ define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone n
|
|||||||
ret float %2
|
ret float %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readnone noinline {
|
define double @fusedMACTest3(double %d1, double %d2, double %d3) {
|
||||||
;CHECK: fusedMACTest3:
|
;CHECK: fusedMACTest3:
|
||||||
;CHECK: vfms.f64
|
;CHECK: vfms.f64
|
||||||
%1 = fmul double %d2, %d3
|
%1 = fmul double %d2, %d3
|
||||||
@@ -25,7 +25,7 @@ define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readno
|
|||||||
ret double %2
|
ret double %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone noinline {
|
define float @fusedMACTest4(float %f1, float %f2, float %f3) {
|
||||||
;CHECK: fusedMACTest4:
|
;CHECK: fusedMACTest4:
|
||||||
;CHECK: vfms.f32
|
;CHECK: vfms.f32
|
||||||
%1 = fmul float %f2, %f3
|
%1 = fmul float %f2, %f3
|
||||||
@@ -33,7 +33,7 @@ define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone n
|
|||||||
ret float %2
|
ret float %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readnone noinline {
|
define double @fusedMACTest5(double %d1, double %d2, double %d3) {
|
||||||
;CHECK: fusedMACTest5:
|
;CHECK: fusedMACTest5:
|
||||||
;CHECK: vfnma.f64
|
;CHECK: vfnma.f64
|
||||||
%1 = fmul double %d1, %d2
|
%1 = fmul double %d1, %d2
|
||||||
@@ -42,7 +42,7 @@ define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readno
|
|||||||
ret double %3
|
ret double %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
|
define float @fusedMACTest6(float %f1, float %f2, float %f3) {
|
||||||
;CHECK: fusedMACTest6:
|
;CHECK: fusedMACTest6:
|
||||||
;CHECK: vfnma.f32
|
;CHECK: vfnma.f32
|
||||||
%1 = fmul float %f1, %f2
|
%1 = fmul float %f1, %f2
|
||||||
@@ -51,7 +51,7 @@ define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
|
|||||||
ret float %3
|
ret float %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
|
define double @fusedMACTest7(double %d1, double %d2, double %d3) {
|
||||||
;CHECK: fusedMACTest7:
|
;CHECK: fusedMACTest7:
|
||||||
;CHECK: vfnms.f64
|
;CHECK: vfnms.f64
|
||||||
%1 = fmul double %d1, %d2
|
%1 = fmul double %d1, %d2
|
||||||
@@ -59,10 +59,42 @@ define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
|
|||||||
ret double %2
|
ret double %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind {
|
define float @fusedMACTest8(float %f1, float %f2, float %f3) {
|
||||||
;CHECK: fusedMACTest8:
|
;CHECK: fusedMACTest8:
|
||||||
;CHECK: vfnms.f32
|
;CHECK: vfnms.f32
|
||||||
%1 = fmul float %f1, %f2
|
%1 = fmul float %f1, %f2
|
||||||
%2 = fsub float %1, %f3
|
%2 = fsub float %1, %f3
|
||||||
ret float %2
|
ret float %2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
|
||||||
|
;CHECK: fusedMACTest9:
|
||||||
|
;CHECK: vfma.f32
|
||||||
|
%mul = fmul <2 x float> %a, %b
|
||||||
|
%add = fadd <2 x float> %mul, %a
|
||||||
|
ret <2 x float> %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
|
||||||
|
;CHECK: fusedMACTest10:
|
||||||
|
;CHECK: vfms.f32
|
||||||
|
%mul = fmul <2 x float> %a, %b
|
||||||
|
%sub = fsub <2 x float> %a, %mul
|
||||||
|
ret <2 x float> %sub
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
|
||||||
|
;CHECK: fusedMACTest11:
|
||||||
|
;CHECK: vfma.f32
|
||||||
|
%mul = fmul <4 x float> %a, %b
|
||||||
|
%add = fadd <4 x float> %mul, %a
|
||||||
|
ret <4 x float> %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
|
||||||
|
;CHECK: fusedMACTest12:
|
||||||
|
;CHECK: vfms.f32
|
||||||
|
%mul = fmul <4 x float> %a, %b
|
||||||
|
%sub = fsub <4 x float> %a, %mul
|
||||||
|
ret <4 x float> %sub
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user