mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
Now that we have a canonical way to handle 256-bit splats:
vinsertf128 $1 + vpermilps $0, remove the old code that used to first do the splat in a 128-bit vector and then insert it into a larger one. This is better because the handling code gets simpler and also makes a better room for the upcoming vbroadcast! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137807 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
23e9ef994e
commit
fc0a702128
@ -4205,34 +4205,6 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, V);
|
||||
}
|
||||
|
||||
/// PromoteVectorToScalarSplat - Since there's no native support for
|
||||
/// scalar_to_vector for 256-bit AVX, a 128-bit scalar_to_vector +
|
||||
/// INSERT_SUBVECTOR is generated. Recognize this idiom and do the
|
||||
/// shuffle before the insertion, this yields less instructions in the end.
|
||||
static SDValue PromoteVectorToScalarSplat(ShuffleVectorSDNode *SV,
|
||||
SelectionDAG &DAG) {
|
||||
EVT SrcVT = SV->getValueType(0);
|
||||
SDValue V1 = SV->getOperand(0);
|
||||
DebugLoc dl = SV->getDebugLoc();
|
||||
int NumElems = SrcVT.getVectorNumElements();
|
||||
|
||||
assert(SrcVT.is256BitVector() && "unknown howto handle vector type");
|
||||
assert(SV->isSplat() && "shuffle must be a splat");
|
||||
|
||||
int SplatIdx = SV->getSplatIndex();
|
||||
const int Mask[4] = { SplatIdx, SplatIdx, SplatIdx, SplatIdx };
|
||||
|
||||
EVT SVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
|
||||
NumElems/2);
|
||||
SDValue SV1 = DAG.getVectorShuffle(SVT, dl, V1.getOperand(1),
|
||||
DAG.getUNDEF(SVT), Mask);
|
||||
SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), SV1,
|
||||
DAG.getConstant(0, MVT::i32), DAG, dl);
|
||||
|
||||
return Insert128BitVector(InsV, SV1,
|
||||
DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
|
||||
}
|
||||
|
||||
/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32 and
|
||||
/// v8i32, v16i16 or v32i8 to v8f32.
|
||||
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
|
||||
@ -6199,16 +6171,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
|
||||
if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
|
||||
return Op;
|
||||
|
||||
// Since there's no native support for scalar_to_vector for 256-bit AVX, a
|
||||
// 128-bit scalar_to_vector + INSERT_SUBVECTOR is generated. Recognize this
|
||||
// idiom and do the shuffle before the insertion, this yields less
|
||||
// instructions in the end.
|
||||
if (VT.is256BitVector() &&
|
||||
V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
V1.getOperand(0).getOpcode() == ISD::UNDEF &&
|
||||
V1.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||
return PromoteVectorToScalarSplat(SVOp, DAG);
|
||||
|
||||
// Handle splats by matching through known shuffle masks
|
||||
if (VT.is128BitVector() && NumElem <= 4)
|
||||
return SDValue();
|
||||
|
@ -24,8 +24,8 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: vmovd
|
||||
; CHECK-NEXT: movlhps
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
; CHECK-NEXT: vpermilps $0
|
||||
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
@ -35,8 +35,8 @@ entry:
|
||||
ret <4 x i64> %vecinit6.i
|
||||
}
|
||||
|
||||
; CHECK: vshufpd
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
; CHECK: vinsertf128 $1
|
||||
; CHECK-NEXT: vpermilps $0
|
||||
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
|
||||
@ -78,8 +78,8 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex
|
||||
ret <8 x float> %load_broadcast12281250
|
||||
}
|
||||
|
||||
; CHECK: vpshufd $0
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
; CHECK: vinsertf128 $1
|
||||
; CHECK-NEXT: vpermilps $0
|
||||
define <8 x float> @funcF(i32* %ptr) nounwind {
|
||||
%val = load i32* %ptr, align 4
|
||||
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
|
||||
|
Loading…
Reference in New Issue
Block a user