mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-12 17:37:13 +00:00
Optimized load + SIGN_EXTEND patterns in the X86 backend.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170506 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bf5a2c6a39
commit
4b977312c7
@ -5235,6 +5235,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
|||||||
LN0->getAlignment());
|
LN0->getAlignment());
|
||||||
CombineTo(N, ExtLoad);
|
CombineTo(N, ExtLoad);
|
||||||
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
||||||
|
AddToWorkList(ExtLoad.getNode());
|
||||||
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||||
}
|
}
|
||||||
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
|
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
|
||||||
|
@ -15929,10 +15929,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
|
|
||||||
// If this is a vector EXT Load then attempt to optimize it using a
|
// If this is a vector EXT Load then attempt to optimize it using a
|
||||||
// shuffle. We need SSSE3 shuffles.
|
// shuffle. We need SSSE3 shuffles.
|
||||||
|
// SEXT loads are suppoted starting SSE41.
|
||||||
|
// We generate X86ISD::VSEXT for them.
|
||||||
// TODO: It is possible to support ZExt by zeroing the undef values
|
// TODO: It is possible to support ZExt by zeroing the undef values
|
||||||
// during the shuffle phase or after the shuffle.
|
// during the shuffle phase or after the shuffle.
|
||||||
if (RegVT.isVector() && RegVT.isInteger() &&
|
if (RegVT.isVector() && RegVT.isInteger() &&
|
||||||
Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
|
(Ext == ISD::EXTLOAD && Subtarget->hasSSSE3() ||
|
||||||
|
Ext == ISD::SEXTLOAD && Subtarget->hasSSE41())){
|
||||||
assert(MemVT != RegVT && "Cannot extend to the same type");
|
assert(MemVT != RegVT && "Cannot extend to the same type");
|
||||||
assert(MemVT.isVector() && "Must load a vector from memory");
|
assert(MemVT.isVector() && "Must load a vector from memory");
|
||||||
|
|
||||||
@ -15941,6 +15944,9 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
unsigned MemSz = MemVT.getSizeInBits();
|
unsigned MemSz = MemVT.getSizeInBits();
|
||||||
assert(RegSz > MemSz && "Register size must be greater than the mem size");
|
assert(RegSz > MemSz && "Register size must be greater than the mem size");
|
||||||
|
|
||||||
|
if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// All sizes must be a power of two.
|
// All sizes must be a power of two.
|
||||||
if (!isPowerOf2_32(RegSz * MemSz * NumElems))
|
if (!isPowerOf2_32(RegSz * MemSz * NumElems))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -15964,16 +15970,23 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
// Calculate the number of scalar loads that we need to perform
|
// Calculate the number of scalar loads that we need to perform
|
||||||
// in order to load our vector from memory.
|
// in order to load our vector from memory.
|
||||||
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
|
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
|
||||||
|
if (Ext == ISD::SEXTLOAD && NumLoads > 1)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
unsigned loadRegZize = RegSz;
|
||||||
|
if (Ext == ISD::SEXTLOAD && RegSz == 256)
|
||||||
|
loadRegZize /= 2;
|
||||||
|
|
||||||
// Represent our vector as a sequence of elements which are the
|
// Represent our vector as a sequence of elements which are the
|
||||||
// largest scalar that we can load.
|
// largest scalar that we can load.
|
||||||
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
|
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
|
||||||
RegSz/SclrLoadTy.getSizeInBits());
|
loadRegZize/SclrLoadTy.getSizeInBits());
|
||||||
|
|
||||||
// Represent the data using the same element type that is stored in
|
// Represent the data using the same element type that is stored in
|
||||||
// memory. In practice, we ''widen'' MemVT.
|
// memory. In practice, we ''widen'' MemVT.
|
||||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
|
EVT WideVecVT =
|
||||||
RegSz/MemVT.getScalarType().getSizeInBits());
|
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
|
||||||
|
loadRegZize/MemVT.getScalarType().getSizeInBits());
|
||||||
|
|
||||||
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
|
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
|
||||||
"Invalid vector type");
|
"Invalid vector type");
|
||||||
@ -16014,6 +16027,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
|
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
|
||||||
unsigned SizeRatio = RegSz/MemSz;
|
unsigned SizeRatio = RegSz/MemSz;
|
||||||
|
|
||||||
|
if (Ext == ISD::SEXTLOAD) {
|
||||||
|
SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
|
||||||
|
return DCI.CombineTo(N, Sext, TF, true);
|
||||||
|
}
|
||||||
// Redistribute the loaded elements into the different locations.
|
// Redistribute the loaded elements into the different locations.
|
||||||
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
|
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
|
||||||
for (unsigned i = 0; i != NumElems; ++i)
|
for (unsigned i = 0; i != NumElems; ++i)
|
||||||
|
@ -5842,6 +5842,31 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
|
|||||||
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
|
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
|
||||||
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
|
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
|
||||||
|
|
||||||
|
let Predicates = [HasAVX2] in {
|
||||||
|
def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
|
||||||
|
(VPMOVSXWDYrm addr:$src)>;
|
||||||
|
def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
|
||||||
|
(VPMOVSXDQYrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVSXBDYrm addr:$src)>;
|
||||||
|
def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVSXBDYrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVSXWQYrm addr:$src)>;
|
||||||
|
def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVSXWQYrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(VPMOVSXBQYrm addr:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
// Common patterns involving scalar load
|
// Common patterns involving scalar load
|
||||||
def : Pat<(int_x86_sse41_pmovsxbq
|
def : Pat<(int_x86_sse41_pmovsxbq
|
||||||
@ -5866,6 +5891,34 @@ let Predicates = [UseSSE41] in {
|
|||||||
(bitconvert (v4i32 (X86vzmovl
|
(bitconvert (v4i32 (X86vzmovl
|
||||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
(PMOVZXBQrm addr:$src)>;
|
(PMOVZXBQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(PMOVSXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(PMOVSXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(PMOVSXBDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(PMOVSXWQrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (extloadi32i16 addr:$src))))))),
|
||||||
|
(PMOVSXBQrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(PMOVSXDQrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(PMOVSXDQrm addr:$src)>;
|
||||||
|
def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(PMOVSXBWrm addr:$src)>;
|
||||||
|
def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(PMOVSXBWrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
@ -5926,6 +5979,35 @@ let Predicates = [HasAVX] in {
|
|||||||
(VPMOVZXDQrm addr:$src)>;
|
(VPMOVZXDQrm addr:$src)>;
|
||||||
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
|
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
|
||||||
(VPMOVZXDQrm addr:$src)>;
|
(VPMOVZXDQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVSXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVSXDQrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVSXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVSXDQrm addr:$src)>;
|
||||||
|
def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
|
||||||
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVSXBWrm addr:$src)>;
|
||||||
|
def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
|
||||||
|
(scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVSXBWrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(VPMOVSXBDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(VPMOVSXWQrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
|
||||||
|
(scalar_to_vector (extloadi32i16 addr:$src))))))),
|
||||||
|
(VPMOVSXBQrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [UseSSE41] in {
|
let Predicates = [UseSSE41] in {
|
||||||
|
@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||||||
; CHECK: main
|
; CHECK: main
|
||||||
define i32 @main() nounwind uwtable {
|
define i32 @main() nounwind uwtable {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: movsbq j(%rip), %
|
; CHECK: pmovsxbq j(%rip), %
|
||||||
; CHECK: movsbq i(%rip), %
|
; CHECK: pmovsxbq i(%rip), %
|
||||||
%0 = load <2 x i8>* @i, align 8
|
%0 = load <2 x i8>* @i, align 8
|
||||||
%1 = load <2 x i8>* @j, align 8
|
%1 = load <2 x i8>* @j, align 8
|
||||||
%div = sdiv <2 x i8> %1, %0
|
%div = sdiv <2 x i8> %1, %0
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
|
||||||
|
|
||||||
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
||||||
;CHECK: sext_8i16_to_8i32
|
;CHECK: sext_8i16_to_8i32
|
||||||
@ -15,3 +15,57 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
|
|||||||
%B = sext <4 x i32> %A to <4 x i64>
|
%B = sext <4 x i32> %A to <4 x i64>
|
||||||
ret <4 x i64>%B
|
ret <4 x i64>%B
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test1
|
||||||
|
; CHECK: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
|
||||||
|
%X = load <4 x i16>* %ptr
|
||||||
|
%Y = sext <4 x i16> %X to <4 x i32>
|
||||||
|
ret <4 x i32>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test2
|
||||||
|
; CHECK: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
|
||||||
|
%X = load <4 x i8>* %ptr
|
||||||
|
%Y = sext <4 x i8> %X to <4 x i32>
|
||||||
|
ret <4 x i32>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test3
|
||||||
|
; CHECK: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
|
||||||
|
%X = load <2 x i8>* %ptr
|
||||||
|
%Y = sext <2 x i8> %X to <2 x i64>
|
||||||
|
ret <2 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test4
|
||||||
|
; CHECK: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
|
||||||
|
%X = load <2 x i16>* %ptr
|
||||||
|
%Y = sext <2 x i16> %X to <2 x i64>
|
||||||
|
ret <2 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test5
|
||||||
|
; CHECK: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
|
||||||
|
%X = load <2 x i32>* %ptr
|
||||||
|
%Y = sext <2 x i32> %X to <2 x i64>
|
||||||
|
ret <2 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test6
|
||||||
|
; CHECK: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
|
||||||
|
%X = load <8 x i8>* %ptr
|
||||||
|
%Y = sext <8 x i8> %X to <8 x i16>
|
||||||
|
ret <8 x i16>%Y
|
||||||
|
}
|
||||||
|
@ -63,6 +63,47 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
|
|||||||
ret <8 x i32>%B
|
ret <8 x i32>%B
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test1
|
||||||
|
; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
|
||||||
|
%X = load <4 x i32>* %ptr
|
||||||
|
%Y = sext <4 x i32> %X to <4 x i64>
|
||||||
|
ret <4 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test2
|
||||||
|
; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
|
||||||
|
%X = load <4 x i8>* %ptr
|
||||||
|
%Y = sext <4 x i8> %X to <4 x i64>
|
||||||
|
ret <4 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test3
|
||||||
|
; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
|
||||||
|
%X = load <4 x i16>* %ptr
|
||||||
|
%Y = sext <4 x i16> %X to <4 x i64>
|
||||||
|
ret <4 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test4
|
||||||
|
; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
|
||||||
|
%X = load <8 x i16>* %ptr
|
||||||
|
%Y = sext <8 x i16> %X to <8 x i32>
|
||||||
|
ret <8 x i32>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: load_sext_test5
|
||||||
|
; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
|
||||||
|
%X = load <8 x i8>* %ptr
|
||||||
|
%Y = sext <8 x i8> %X to <8 x i32>
|
||||||
|
ret <8 x i32>%Y
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user