mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	[X86][SSE] Sign extension for target vector sizes less than 128 bits (pt1)
This patch adds support for sign extension for sub 128-bit vectors, such as to v2i32. It concatenates with UNDEF subvectors up to 128-bits, performs the sign extension (i.e. as v4i32) and then extracts the target subvector. Patch 1/2 of D10589 - the second patch covers the conversion of v2i8/v2i16 to v2f64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241323 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -24233,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, | ||||
|     return SDValue(); | ||||
|   } | ||||
| 
 | ||||
|   if (VT.isVector()) { | ||||
|     auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) { | ||||
|   if (VT.isVector() && Subtarget->hasSSE2()) { | ||||
|     auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) { | ||||
|       EVT InVT = N.getValueType(); | ||||
|       EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), | ||||
|                                    128 / InVT.getScalarSizeInBits()); | ||||
|       SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(), | ||||
|                                    Size / InVT.getScalarSizeInBits()); | ||||
|       SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(), | ||||
|                                     DAG.getUNDEF(InVT)); | ||||
|       Opnds[0] = N; | ||||
|       return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds); | ||||
|     }; | ||||
| 
 | ||||
|     // If target-size is less than 128-bits, extend to a type that would extend
 | ||||
|     // to 128 bits, extend that and extract the original target vector.
 | ||||
|     if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) && | ||||
|         (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && | ||||
|         (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { | ||||
|       unsigned Scale = 128 / VT.getSizeInBits(); | ||||
|       EVT ExVT = | ||||
|           EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); | ||||
|       SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); | ||||
|       SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex); | ||||
|       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, | ||||
|                          DAG.getIntPtrConstant(0, DL)); | ||||
|     } | ||||
| 
 | ||||
|     // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
 | ||||
|     // which ensures lowering to X86ISD::VSEXT (pmovsx*).
 | ||||
|     if (VT.getSizeInBits() == 128 && | ||||
|         (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && | ||||
|         (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { | ||||
|       SDValue ExOp = ExtendToVec128(DL, N0); | ||||
|       SDValue ExOp = ExtendVecSize(DL, N0, 128); | ||||
|       return DAG.getSignExtendVectorInReg(ExOp, DL, VT); | ||||
|     } | ||||
| 
 | ||||
| @@ -24268,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, | ||||
|            ++i, Offset += NumSubElts) { | ||||
|         SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, | ||||
|                                      DAG.getIntPtrConstant(Offset, DL)); | ||||
|         SrcVec = ExtendToVec128(DL, SrcVec); | ||||
|         SrcVec = ExtendVecSize(DL, SrcVec, 128); | ||||
|         SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT); | ||||
|         Opnds.push_back(SrcVec); | ||||
|       } | ||||
|   | ||||
| @@ -117,6 +117,46 @@ entry: | ||||
|   ret <4 x i64>%B | ||||
| } | ||||
|  | ||||
| define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { | ||||
| ; SSE2-LABEL: sext_2i8_to_i32: | ||||
| ; SSE2:       # BB#0: # %entry | ||||
| ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | ||||
| ; SSE2-NEXT:    psraw $8, %xmm0 | ||||
| ; SSE2-NEXT:    movd %xmm0, %eax | ||||
| ; SSE2-NEXT:    retq | ||||
| ; | ||||
| ; SSSE3-LABEL: sext_2i8_to_i32: | ||||
| ; SSSE3:       # BB#0: # %entry | ||||
| ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | ||||
| ; SSSE3-NEXT:    psraw $8, %xmm0 | ||||
| ; SSSE3-NEXT:    movd %xmm0, %eax | ||||
| ; SSSE3-NEXT:    retq | ||||
| ; | ||||
| ; SSE41-LABEL: sext_2i8_to_i32: | ||||
| ; SSE41:       # BB#0: # %entry | ||||
| ; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0 | ||||
| ; SSE41-NEXT:    movd %xmm0, %eax | ||||
| ; SSE41-NEXT:    retq | ||||
| ; | ||||
| ; AVX-LABEL: sext_2i8_to_i32: | ||||
| ; AVX:       # BB#0: # %entry | ||||
| ; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0 | ||||
| ; AVX-NEXT:    vmovd %xmm0, %eax | ||||
| ; AVX-NEXT:    retq | ||||
| ; | ||||
| ; X32-SSE41-LABEL: sext_2i8_to_i32: | ||||
| ; X32-SSE41:       # BB#0: # %entry | ||||
| ; X32-SSE41:         pmovsxbw %xmm0, %xmm0 | ||||
| ; X32-SSE41-NEXT:    movd %xmm0, %eax | ||||
| ; X32-SSE41-NEXT:    popl %edx | ||||
| ; X32-SSE41-NEXT:    retl | ||||
| entry: | ||||
|   %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> | ||||
|   %Ex = sext <2 x i8> %Shuf to <2 x i16> | ||||
|   %Bc = bitcast <2 x i16> %Ex to i32 | ||||
|   ret i32 %Bc | ||||
| } | ||||
|  | ||||
| define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) { | ||||
| ; SSE2-LABEL: load_sext_test1: | ||||
| ; SSE2:       # BB#0: # %entry | ||||
|   | ||||
		Reference in New Issue
	
	Block a user