mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-10-27 09:17:11 +00:00
Rename VBROADCASTSDrm into VBROADCASTSDYrm to match the naming convention.
Allow the folding of vbroadcastRR to vbroadcastRM, where the memory operand is a spill slot. PR12782. Together with Michael Kuperstein <michael.m.kuperstein@intel.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160230 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -539,6 +539,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||||||
{ X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
|
{ X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
|
||||||
{ X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
|
{ X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
|
||||||
{ X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
|
{ X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
|
||||||
|
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
|
||||||
|
|
||||||
// AVX 256-bit foldable instructions
|
// AVX 256-bit foldable instructions
|
||||||
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
|
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
|
||||||
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
|
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
|
||||||
@@ -547,6 +549,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||||||
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
|
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
|
||||||
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
|
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
|
||||||
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
|
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
|
||||||
|
|
||||||
// AVX2 foldable instructions
|
// AVX2 foldable instructions
|
||||||
{ X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
|
{ X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
|
||||||
{ X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
|
{ X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
|
||||||
@@ -562,6 +565,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||||||
{ X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
|
{ X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
|
||||||
{ X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
|
{ X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
|
||||||
{ X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
|
{ X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
|
||||||
|
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
|
||||||
|
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
|
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
|
||||||
|
|||||||
@@ -7274,7 +7274,7 @@ let ExeDomain = SSEPackedSingle in {
|
|||||||
int_x86_avx_vbroadcast_ss_256>;
|
int_x86_avx_vbroadcast_ss_256>;
|
||||||
}
|
}
|
||||||
let ExeDomain = SSEPackedDouble in
|
let ExeDomain = SSEPackedDouble in
|
||||||
def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
|
def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
|
||||||
int_x86_avx_vbroadcast_sd_256>;
|
int_x86_avx_vbroadcast_sd_256>;
|
||||||
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
|
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
|
||||||
int_x86_avx_vbroadcastf128_pd_256>;
|
int_x86_avx_vbroadcastf128_pd_256>;
|
||||||
@@ -7753,11 +7753,11 @@ let Predicates = [HasAVX] in {
|
|||||||
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
|
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
|
||||||
(VBROADCASTSSYrm addr:$src)>;
|
(VBROADCASTSSYrm addr:$src)>;
|
||||||
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
|
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
|
||||||
(VBROADCASTSDrm addr:$src)>;
|
(VBROADCASTSDYrm addr:$src)>;
|
||||||
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
|
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
|
||||||
(VBROADCASTSSYrm addr:$src)>;
|
(VBROADCASTSSYrm addr:$src)>;
|
||||||
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
|
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||||
(VBROADCASTSDrm addr:$src)>;
|
(VBROADCASTSDYrm addr:$src)>;
|
||||||
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
|
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
|
||||||
(VBROADCASTSSrm addr:$src)>;
|
(VBROADCASTSSrm addr:$src)>;
|
||||||
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
|
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
|
||||||
|
|||||||
21
test/CodeGen/X86/2012-07-15-broadcastfold.ll
Normal file
21
test/CodeGen/X86/2012-07-15-broadcastfold.ll
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
|
||||||
|
|
||||||
|
declare x86_fastcallcc i64 @barrier()
|
||||||
|
|
||||||
|
;CHECK: bcast_fold
|
||||||
|
;CHECK: vbroadcastss -24(%ebp), %ymm0 # 16-byte Folded Reload
|
||||||
|
;CHECK: ret
|
||||||
|
define <8 x float> @bcast_fold( float* %A) {
|
||||||
|
BB:
|
||||||
|
%A0 = load float* %A
|
||||||
|
%tt3 = call x86_fastcallcc i64 @barrier()
|
||||||
|
br i1 undef, label %work, label %exit
|
||||||
|
|
||||||
|
work:
|
||||||
|
%A1 = insertelement <8 x float> undef, float %A0, i32 0
|
||||||
|
%A2 = shufflevector <8 x float> %A1, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||||
|
ret <8 x float> %A2
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret <8 x float> undef
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user