[AArch64] Simplify the Neon Scalar3Same patterns for floating-point reciprocal

step, floating-point reciprocal square root step, floating-point absolute
difference, and integer/floating-point compare instructions.  Also, move the
scalar general arithmetic operation patterns closer to similar code.  No
functional change intended.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197250 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2013-12-13 17:56:44 +00:00
parent 48c2927021
commit 513a00db78

View File

@ -4155,19 +4155,12 @@ multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
: Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
(INSTB FPR8:$Rn, FPR8:$Rm)>;
def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
(INSTH FPR16:$Rn, FPR16:$Rm)>;
def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
(INSTS FPR32:$Rn, FPR32:$Rm)>;
}
class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
Instruction INSTD>
: Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
(INSTD FPR64:$Rn, FPR64:$Rm)>;
multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
Instruction INSTH,
Instruction INSTS> {
@ -4177,33 +4170,13 @@ multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
(INSTS FPR32:$Rn, FPR32:$Rm)>;
}
multiclass Neon_Scalar3Same_fabd_SD_size_patterns<SDPatternOperator opnode,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
(INSTS FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
(INSTD FPR64:$Rn, FPR64:$Rm)>;
}
multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
SDPatternOperator opnodeV,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
ValueType SResTy, ValueType STy,
Instruction INSTS, ValueType DResTy,
ValueType DTy, Instruction INSTD> {
def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
(INSTS FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
(INSTD FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (opnodeV (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(INSTD FPR64:$Rn, FPR64:$Rm)>;
}
multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
(INSTS FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
(INSTD FPR64:$Rn, FPR64:$Rm)>;
}
@ -4875,15 +4848,17 @@ defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
// Scalar Floating-point Reciprocal Step
defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps,
int_arm_neon_vrecps, FRECPSsss,
FRECPSddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
FRECPSsss, f64, f64, FRECPSddd>;
def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
// Scalar Floating-point Reciprocal Square Root Step
defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts,
int_arm_neon_vrsqrts, FRSQRTSsss,
FRSQRTSddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
FRSQRTSsss, f64, f64, FRSQRTSddd>;
def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
// Patterns to match llvm.aarch64.* intrinsic for
@ -5092,7 +5067,7 @@ def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
// Scalar Compare Bitwise Equal
def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
Instruction INSTD,
@ -5104,28 +5079,28 @@ def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
// Scalar Compare Signed Greather Than Or Equal
def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
// Scalar Compare Unsigned Higher Or Same
def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
// Scalar Compare Unsigned Higher
def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
// Scalar Compare Signed Greater Than
def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
// Scalar Compare Bitwise Test Bits
def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
def : Neon_Scalar3Same_cmp_D_size_patterns<Neon_tst, CMTSTddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
// Scalar Compare Bitwise Equal To Zero
def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
@ -5161,8 +5136,8 @@ def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
// Scalar Floating-point Compare Mask Equal
defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_fceq,
FCMEQsss, FCMEQddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
FCMEQsss, v1i64, f64, FCMEQddd>;
def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
// Scalar Floating-point Compare Mask Equal To Zero
@ -5174,8 +5149,8 @@ def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), SETEQ)),
// Scalar Floating-point Compare Mask Greater Than Or Equal
defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_fcge,
FCMGEsss, FCMGEddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
FCMGEsss, v1i64, f64, FCMGEddd>;
def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
@ -5185,8 +5160,8 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge,
// Scalar Floating-point Compare Mask Greather Than
defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_fcgt,
FCMGTsss, FCMGTddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
FCMGTsss, v1i64, f64, FCMGTddd>;
def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
// Scalar Floating-point Compare Mask Greather Than Zero
@ -5206,22 +5181,22 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz,
// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_fcage,
FACGEsss, FACGEddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
FACGEsss, v1i64, f64, FACGEddd>;
def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FACGEddd FPR64:$Rn, FPR64:$Rm)>;
// Scalar Floating-point Absolute Compare Mask Greater Than
defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_fcagt,
FACGTsss, FACGTddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
FACGTsss, v1i64, f64, FACGTddd>;
def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FACGTddd FPR64:$Rn, FPR64:$Rm)>;
// Scakar Floating-point Absolute Difference
// Scalar Floating-point Absolute Difference
defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
defm : Neon_Scalar3Same_fabd_SD_size_patterns<int_aarch64_neon_vabd,
FABDsss, FABDddd>;
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
FABDsss, f64, f64, FABDddd>;
// Scalar Absolute Value
defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
@ -5481,7 +5456,6 @@ defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
v1f64, v2f64, neon_uimm0_bare>;
// Scalar Floating Point fused multiply-add (scalar, by element)
def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
@ -5766,38 +5740,6 @@ defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
i32, VPR128Lo, neon_uimm2_bare>;
// Scalar general arithmetic operation
class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
Instruction INST>
: Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
Instruction INST>
: Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(INST FPR64:$Rn, FPR64:$Rm)>;
class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
Instruction INST>
: Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
(v1f64 FPR64:$Ra))),
(INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
// Scalar Signed saturating doubling multiply returning
// high half (scalar, by element)
def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
@ -5884,6 +5826,38 @@ defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
VPR128Lo, neon_uimm2_bare>;
// Scalar general arithmetic operation
class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
Instruction INST>
: Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
Instruction INST>
: Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(INST FPR64:$Rn, FPR64:$Rm)>;
class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
Instruction INST>
: Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
(v1f64 FPR64:$Ra))),
(INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
// Scalar Copy - DUP element to scalar
class NeonI_Scalar_DUP<string asmop, string asmlane,
RegisterClass ResRC, RegisterOperand VPRC,