mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 07:17:36 +00:00
[X86][SSE] Sign extension for target vector sizes less than 128 bits (pt1)
This patch adds support for sign extension for sub 128-bit vectors, such as to v2i32. It concatenates with UNDEF subvectors up to 128-bits, performs the sign extension (i.e. as v4i32) and then extracts the target subvector. Patch 1/2 of D10589 - the second patch covers the conversion of v2i8/v2i16 to v2f64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241323 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -24233,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VT.isVector()) {
|
if (VT.isVector() && Subtarget->hasSSE2()) {
|
||||||
auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
|
auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
|
||||||
EVT InVT = N.getValueType();
|
EVT InVT = N.getValueType();
|
||||||
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
|
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
|
||||||
128 / InVT.getScalarSizeInBits());
|
Size / InVT.getScalarSizeInBits());
|
||||||
SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
|
SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
|
||||||
DAG.getUNDEF(InVT));
|
DAG.getUNDEF(InVT));
|
||||||
Opnds[0] = N;
|
Opnds[0] = N;
|
||||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
|
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// If target-size is less than 128-bits, extend to a type that would extend
|
||||||
|
// to 128 bits, extend that and extract the original target vector.
|
||||||
|
if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) &&
|
||||||
|
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
|
||||||
|
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
|
||||||
|
unsigned Scale = 128 / VT.getSizeInBits();
|
||||||
|
EVT ExVT =
|
||||||
|
EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
|
||||||
|
SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
|
||||||
|
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
|
||||||
|
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
|
||||||
|
DAG.getIntPtrConstant(0, DL));
|
||||||
|
}
|
||||||
|
|
||||||
// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
|
// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
|
||||||
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
|
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
|
||||||
if (VT.getSizeInBits() == 128 &&
|
if (VT.getSizeInBits() == 128 &&
|
||||||
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
|
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
|
||||||
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
|
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
|
||||||
SDValue ExOp = ExtendToVec128(DL, N0);
|
SDValue ExOp = ExtendVecSize(DL, N0, 128);
|
||||||
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
|
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24268,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
++i, Offset += NumSubElts) {
|
++i, Offset += NumSubElts) {
|
||||||
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
|
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
|
||||||
DAG.getIntPtrConstant(Offset, DL));
|
DAG.getIntPtrConstant(Offset, DL));
|
||||||
SrcVec = ExtendToVec128(DL, SrcVec);
|
SrcVec = ExtendVecSize(DL, SrcVec, 128);
|
||||||
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
|
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
|
||||||
Opnds.push_back(SrcVec);
|
Opnds.push_back(SrcVec);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,6 +117,46 @@ entry:
|
|||||||
ret <4 x i64>%B
|
ret <4 x i64>%B
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
|
||||||
|
; SSE2-LABEL: sext_2i8_to_i32:
|
||||||
|
; SSE2: # BB#0: # %entry
|
||||||
|
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||||
|
; SSE2-NEXT: psraw $8, %xmm0
|
||||||
|
; SSE2-NEXT: movd %xmm0, %eax
|
||||||
|
; SSE2-NEXT: retq
|
||||||
|
;
|
||||||
|
; SSSE3-LABEL: sext_2i8_to_i32:
|
||||||
|
; SSSE3: # BB#0: # %entry
|
||||||
|
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||||
|
; SSSE3-NEXT: psraw $8, %xmm0
|
||||||
|
; SSSE3-NEXT: movd %xmm0, %eax
|
||||||
|
; SSSE3-NEXT: retq
|
||||||
|
;
|
||||||
|
; SSE41-LABEL: sext_2i8_to_i32:
|
||||||
|
; SSE41: # BB#0: # %entry
|
||||||
|
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
|
||||||
|
; SSE41-NEXT: movd %xmm0, %eax
|
||||||
|
; SSE41-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX-LABEL: sext_2i8_to_i32:
|
||||||
|
; AVX: # BB#0: # %entry
|
||||||
|
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||||
|
; AVX-NEXT: vmovd %xmm0, %eax
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
;
|
||||||
|
; X32-SSE41-LABEL: sext_2i8_to_i32:
|
||||||
|
; X32-SSE41: # BB#0: # %entry
|
||||||
|
; X32-SSE41: pmovsxbw %xmm0, %xmm0
|
||||||
|
; X32-SSE41-NEXT: movd %xmm0, %eax
|
||||||
|
; X32-SSE41-NEXT: popl %edx
|
||||||
|
; X32-SSE41-NEXT: retl
|
||||||
|
entry:
|
||||||
|
%Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||||
|
%Ex = sext <2 x i8> %Shuf to <2 x i16>
|
||||||
|
%Bc = bitcast <2 x i16> %Ex to i32
|
||||||
|
ret i32 %Bc
|
||||||
|
}
|
||||||
|
|
||||||
define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
|
define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
|
||||||
; SSE2-LABEL: load_sext_test1:
|
; SSE2-LABEL: load_sext_test1:
|
||||||
; SSE2: # BB#0: # %entry
|
; SSE2: # BB#0: # %entry
|
||||||
|
|||||||
Reference in New Issue
Block a user