Use VLD1 in NEON extenting-load patterns instead of VLDR.

On some cores it's a bad idea for performance to mix VFP and NEON instructions
and since these patterns are NEON anyway, the NEON load should be used.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155630 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2012-04-26 08:46:29 +00:00
parent e38993f892
commit 37abe8df4a
2 changed files with 65 additions and 58 deletions

View File

@ -5595,47 +5595,51 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
// Lengthen_Single<"8", "i16", "i8"> =
// Pat<(v8i16 (extloadvi8 addrmode6oneL32:$addr))
// (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0)))>;
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
(VLDRD addrmode5:$addr))>;
(VLD1LNd32 addrmode6oneL32:$addr,
(f64 (IMPLICIT_DEF)), (i32 0)))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
(VLDRD addrmode5:$addr))>;
(VLD1LNd32 addrmode6oneL32:$addr,
(f64 (IMPLICIT_DEF)), (i32 0)))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
(VLDRD addrmode5:$addr))>;
(VLD1LNd32 addrmode6oneL32:$addr,
(f64 (IMPLICIT_DEF)), (i32 0)))>;
}
// extload, zextload and sextload for a lengthening load which only uses
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
// (VLDRS addrmode5:$addr),
// ssub_0)),
// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
string InsnLanes, string InsnTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
}
@ -5645,32 +5649,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
// (VLDRS addrmode5:$addr),
// ssub_0)),
// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)),
// (i32 0))),
// dsub_0)),
// qsub_0)>;
// dsub_0)>;
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
ssub_0)), dsub_0))>;
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
ssub_0)), dsub_0))>;
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
ssub_0)), dsub_0))>;
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0))>;
}
// extload, zextload and sextload for a lengthening load followed by another
@ -5678,36 +5682,35 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
//
// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
// (VLDRS addrmode5:$addr),
// ssub_0)),
// dsub_0)),
// dsub_0)>;
// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)),
// dsub_0)>;
multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
ssub_0)), dsub_0)),
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
ssub_0)), dsub_0)),
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
ssub_0)), dsub_0)),
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
dsub_0)>;
}
@ -5727,18 +5730,18 @@ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
def : Pat<(v2i64 (extloadvi8 addrmode6oneL32:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
dsub_0)), dsub_0))>;
def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
(VLD1LNd32 addrmode6oneL32:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (zextloadvi8 addrmode6oneL32:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
dsub_0)), dsub_0))>;
def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
(VLD1LNd32 addrmode6oneL32:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (sextloadvi8 addrmode6oneL32:$addr)),
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
dsub_0)), dsub_0))>;
(VLD1LNd32 addrmode6oneL32:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
//===----------------------------------------------------------------------===//
// Assembler aliases

View File

@ -20,7 +20,9 @@ define float @f(<4 x i16>* nocapture %in) {
; CHECK: g:
define float @g(<4 x i8>* nocapture %in) {
; CHECK: vldr
; Note: vld1 here is reasonably important. Mixing VFP and NEON
; instructions is bad on some cores
; CHECK: vld1
; CHECK: vmovl.u8
; CHECK: vmovl.u16
%1 = load <4 x i8>* %in
@ -47,7 +49,9 @@ define <4 x i8> @h(<4 x float> %v) {
; CHECK: i:
define <4 x i8> @i(<4 x i8>* %x) {
; CHECK: vldr
; Note: vld1 here is reasonably important. Mixing VFP and NEON
; instructions is bad on some cores
; CHECK: vld1
; CHECK: vmovl.s8
; CHECK: vmovl.s16
; CHECK: vrecpe