mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-01 15:17:25 +00:00
combine consecutive subvector 16-byte loads into one 32-byte load
This is a fix for PR21709 ( http://llvm.org/bugs/show_bug.cgi?id=21709 ). When we have 2 consecutive 16-byte loads that are merged into one 32-byte vector, we can use a single 32-byte load instead. But we don't do this for SandyBridge / IvyBridge because they have slower 32-byte memops. We also don't bother using 32-byte *integer* loads on a machine that only has AVX1 (btver2) because those operands would have to be split in half anyway since there is no support for 32-byte integer math ops. Differential Revision: http://reviews.llvm.org/D6492 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224344 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -773,6 +773,7 @@ def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
|
||||
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
|
||||
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
||||
@@ -8158,6 +8158,49 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
}
|
||||
|
||||
// Combine two consecutive 16-byte loads with a common destination register into
|
||||
// one 32-byte load to that register.
|
||||
let Predicates = [HasAVX, HasFastMem32] in {
|
||||
def : Pat<(insert_subvector
|
||||
(v8f32 (insert_subvector undef, (loadv4f32 addr:$src), (iPTR 0))),
|
||||
(loadv4f32 (add addr:$src, (iPTR 16))),
|
||||
(iPTR 4)),
|
||||
(VMOVUPSYrm addr:$src)>;
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v4f64 (insert_subvector undef, (loadv2f64 addr:$src), (iPTR 0))),
|
||||
(loadv2f64 (add addr:$src, (iPTR 16))),
|
||||
(iPTR 2)),
|
||||
(VMOVUPDYrm addr:$src)>;
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v32i8 (insert_subvector
|
||||
undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))),
|
||||
(bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))),
|
||||
(iPTR 16)),
|
||||
(VMOVDQUYrm addr:$src)>;
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v16i16 (insert_subvector
|
||||
undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))),
|
||||
(bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))),
|
||||
(iPTR 8)),
|
||||
(VMOVDQUYrm addr:$src)>;
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v8i32 (insert_subvector
|
||||
undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))),
|
||||
(bc_v4i32 (loadv2i64 (add addr:$src, (iPTR 16)))),
|
||||
(iPTR 4)),
|
||||
(VMOVDQUYrm addr:$src)>;
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v4i64 (insert_subvector undef, (loadv2i64 addr:$src), (iPTR 0))),
|
||||
(loadv2i64 (add addr:$src, (iPTR 16))),
|
||||
(iPTR 2)),
|
||||
(VMOVDQUYrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||
(iPTR imm)),
|
||||
|
||||
Reference in New Issue
Block a user