From 5eda282cd1775afc2ec1e1b86c9e224d2db10302 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 16 Feb 2011 00:35:02 +0000 Subject: [PATCH] Some single precision VFP instructions may be executed on NEON pipeline, but not double precision ones. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125624 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 8 +- lib/Target/ARM/ARMInstrVFP.td | 180 ++++++++++++++++++++++++++---- 2 files changed, 160 insertions(+), 28 deletions(-) diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index b602712a23e..24587cb6f69 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1200,7 +1200,7 @@ class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{11-9} = 0b101; let Inst{8} = 1; // Double precision - // 64-bit loads & stores operate on both NEON and VFP pipelines. + // Loads & stores operate on both NEON and VFP pipelines. let D = VFPNeonDomain; } @@ -1225,6 +1225,9 @@ class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; let Inst{8} = 0; // Single precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; } // VFP Load / store multiple pseudo instructions. @@ -1257,9 +1260,6 @@ class AXDI4; + [(set SPR:$Sd, (load addrmode5:$addr))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' @@ -68,7 +72,11 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", ".32\t$Sd, $addr", - [(store SPR:$Sd, addrmode5:$addr)]>; + [(store SPR:$Sd, addrmode5:$addr)]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} //===----------------------------------------------------------------------===// // Load / store multiple Instructions. @@ -118,6 +126,10 @@ multiclass vfp_ldst_mult; + [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -182,7 +210,11 @@ def VSUBD : ADbI<0b11100, 0b11, 1, 0, def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>; + [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -202,7 +234,11 @@ def VMULD : ADbI<0b11100, 0b10, 0, 0, def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>; + [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -212,7 +248,11 @@ def VNMULD : ADbI<0b11100, 0b10, 1, 0, def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>; + [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), @@ -230,7 +270,11 @@ def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm)]>; + [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} // FIXME: Verify encoding after integrated assembler is working. def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, @@ -241,8 +285,12 @@ def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } +} // Defs = [FPSCR] //===----------------------------------------------------------------------===// // FP Unary Operations. @@ -256,7 +304,11 @@ def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", - [(set SPR:$Sd, (fabs SPR:$Sm))]>; + [(set SPR:$Sd, (fabs SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} let Defs = [FPSCR] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, @@ -273,6 +325,10 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // FIXME: Verify encoding after integrated assembler is working. @@ -290,8 +346,12 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } -} +} // Defs = [FPSCR] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), @@ -362,7 +422,11 @@ def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", - [(set SPR:$Sd, (fneg SPR:$Sm))]>; + [(set SPR:$Sd, (fneg SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -533,6 +597,10 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -547,6 +615,10 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // FP -> Int: @@ -598,6 +670,10 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -612,6 +688,10 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -662,22 +742,38 @@ let Constraints = "$a = $dst", isCodeGenOnly = 1 in { def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), @@ -704,22 +800,38 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), @@ -761,7 +873,11 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -784,7 +900,11 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -807,7 +927,11 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -829,7 +953,11 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -866,7 +994,11 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd">; + RegConstraint<"$Sn = $Sd"> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} } // neverHasSideEffects //===----------------------------------------------------------------------===//