mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-07-22 09:29:31 +00:00
Optimization for the following SIGN_EXTEND pairs:
v8i8 -> v8i64, v8i8 -> v8i32, v4i8 -> v4i64, v4i16 -> v4i64 for AVX and AVX2. Bug 14865. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172708 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c2cbcc3acf
commit
6c327f92a5
@ -4298,11 +4298,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||
if (isa<ConstantSDNode>(N0))
|
||||
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
|
||||
|
||||
// fold (sext (sext x)) -> (sext x)
|
||||
// fold (sext (aext x)) -> (sext x)
|
||||
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
|
||||
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
|
||||
N0.getOperand(0));
|
||||
// Folding (sext (sext x)) is obvious, but we do it only after the type
|
||||
// legalization phase. When the sequence is like {(T1->T2), (T2->T3)} and
|
||||
// T1 or T3 (or the both) are illegal types, the TypeLegalizer may not
|
||||
// give a good sequence for the (T1->T3) pair.
|
||||
// So we give a chance to target specific combiner to optimize T1->T2 and T2->T3
|
||||
// separately and may be fold them in a preceding of subsequent instruction.
|
||||
if (Level >= AfterLegalizeTypes) {
|
||||
// fold (sext (sext x)) -> (sext x)
|
||||
// fold (sext (aext x)) -> (sext x)
|
||||
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
|
||||
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
|
||||
N0.getOperand(0));
|
||||
}
|
||||
|
||||
if (N0.getOpcode() == ISD::TRUNCATE) {
|
||||
// fold (sext (truncate (load x))) -> (sext (smaller load x))
|
||||
|
@ -2554,9 +2554,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
|
||||
VT.getVectorNumElements() ==
|
||||
Operand.getValueType().getVectorNumElements()) &&
|
||||
"Vector element count mismatch!");
|
||||
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
|
||||
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
|
||||
else if (OpOpcode == ISD::UNDEF)
|
||||
if (OpOpcode == ISD::UNDEF)
|
||||
// sext(undef) = 0, because the top bits will all be the same.
|
||||
return getConstant(0, VT);
|
||||
break;
|
||||
|
@ -16970,14 +16970,37 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (!VT.isVector())
|
||||
return SDValue();
|
||||
|
||||
SDValue In = N->getOperand(0);
|
||||
EVT InVT = In.getValueType();
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
unsigned ExtenedEltSize = VT.getVectorElementType().getSizeInBits();
|
||||
|
||||
// Split SIGN_EXTEND operation to use vmovsx instruction when possible
|
||||
if (InVT == MVT::v8i8) {
|
||||
if (ExtenedEltSize > 16 && !Subtarget->hasInt256())
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In);
|
||||
if (ExtenedEltSize > 32)
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In);
|
||||
return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In);
|
||||
}
|
||||
|
||||
if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) &&
|
||||
ExtenedEltSize > 32 && !Subtarget->hasInt256()) {
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
|
||||
return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In);
|
||||
}
|
||||
if (!DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
if (!Subtarget->hasFp256())
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT.isVector() && VT.getSizeInBits() == 256) {
|
||||
if (VT.is256BitVector()) {
|
||||
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
|
||||
if (R.getNode())
|
||||
return R;
|
||||
|
@ -142,3 +142,71 @@ define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
|
||||
%Y = sext <8 x i8> %X to <8 x i16>
|
||||
ret <8 x i16>%Y
|
||||
}
|
||||
; AVX: sext_1
|
||||
; AVX: vpmovsxbd
|
||||
; AVX: vpmovsxdq
|
||||
; AVX: vpmovsxdq
|
||||
; AVX: ret
|
||||
define void @sext_1(<4 x i8>* %inbuf, <4 x i64>* %outbuf) {
|
||||
%v0 = load <4 x i8>* %inbuf
|
||||
%r = sext <4 x i8> %v0 to <4 x i64>
|
||||
store <4 x i64> %r, <4 x i64>* %outbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
; AVX: sext_2
|
||||
; AVX: vpmovsxbd
|
||||
; AVX: ret
|
||||
define void @sext_2(<4 x i8>* %inbuf, <4 x i32>* %outbuf) {
|
||||
%v0 = load <4 x i8>* %inbuf
|
||||
%r = sext <4 x i8> %v0 to <4 x i32>
|
||||
store <4 x i32> %r, <4 x i32>* %outbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
; AVX: sext_3
|
||||
; AVX: vpmovsxwd
|
||||
; AVX: ret
|
||||
define void @sext_3(<4 x i16>* %inbuf, <4 x i32>* %outbuf) {
|
||||
%v0 = load <4 x i16>* %inbuf
|
||||
%r = sext <4 x i16> %v0 to <4 x i32>
|
||||
store <4 x i32> %r, <4 x i32>* %outbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
; AVX: sext_4
|
||||
; AVX: vpmovsxwd
|
||||
; AVX: vpmovsxdq
|
||||
; AVX: vpmovsxdq
|
||||
; AVX: ret
|
||||
define void @sext_4(<4 x i16>* %inbuf, <4 x i64>* %outbuf) {
|
||||
%v0 = load <4 x i16>* %inbuf
|
||||
%r = sext <4 x i16> %v0 to <4 x i64>
|
||||
store <4 x i64> %r, <4 x i64>* %outbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
; AVX: sext_5
|
||||
; AVX: vpmovsxbw
|
||||
; AVX: vpmovsxwd
|
||||
; AVX: vpmovsxwd
|
||||
; AVX: vpmovsxdq
|
||||
; AVX: ret
|
||||
define void @sext_5(<8 x i8>* %inbuf, <8 x i64>* %outbuf) {
|
||||
%v0 = load <8 x i8>* %inbuf
|
||||
%r = sext <8 x i8> %v0 to <8 x i64>
|
||||
store <8 x i64> %r, <8 x i64>* %outbuf
|
||||
ret void
|
||||
}
|
||||
; AVX: sext_6
|
||||
; AVX: vpmovsxbw
|
||||
; AVX: vpmovsxwd
|
||||
; AVX: vpmovsxwd
|
||||
; AVX: ret
|
||||
define void @sext_6(<8 x i8>* %inbuf, <8 x i32>* %outbuf) {
|
||||
%v0 = load <8 x i8>* %inbuf
|
||||
%r = sext <8 x i8> %v0 to <8 x i32>
|
||||
store <8 x i32> %r, <8 x i32>* %outbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -107,3 +107,15 @@ define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
|
||||
%Y = sext <8 x i8> %X to <8 x i32>
|
||||
ret <8 x i32>%Y
|
||||
}
|
||||
|
||||
; CHECK: load_sext_test6
|
||||
; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
|
||||
; CHECK: vpmovsxdq
|
||||
; CHECK: vpmovsxdq
|
||||
; CHECK: ret
|
||||
define <8 x i64> @load_sext_test6(<8 x i8> *%ptr) {
|
||||
%X = load <8 x i8>* %ptr
|
||||
%Y = sext <8 x i8> %X to <8 x i64>
|
||||
ret <8 x i64>%Y
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user