mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 06:32:24 +00:00
SelectionDAG shuffle nodes do not allow operands with different numbers of
elements than the result vector type. So, when an instruction like: %8 = shufflevector <2 x float> %4, <2 x float> %7, <4 x i32> <i32 1, i32 0, i32 3, i32 2> is translated to a DAG, each operand is changed to a concat_vectors node that appends 2 undef elements. That is: shuffle [a,b], [c,d] is changed to: shuffle [a,b,u,u], [c,d,u,u] That's probably the right thing for x86 but for NEON, we'd much rather have: shuffle [a,b,c,d], undef Teach the DAG combiner how to do that transformation for ARM. Radar 8597007. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117482 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bc82d8b84f
commit
f20700ca77
@ -460,6 +460,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||||
setTargetDAGCombine(ISD::SELECT_CC);
|
setTargetDAGCombine(ISD::SELECT_CC);
|
||||||
setTargetDAGCombine(ISD::BUILD_VECTOR);
|
setTargetDAGCombine(ISD::BUILD_VECTOR);
|
||||||
|
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
computeRegisterProperties();
|
computeRegisterProperties();
|
||||||
@ -4531,6 +4532,59 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
|
||||||
|
/// ISD::VECTOR_SHUFFLE.
|
||||||
|
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
|
||||||
|
// The LLVM shufflevector instruction does not require the shuffle mask
|
||||||
|
// length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
|
||||||
|
// have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
|
||||||
|
// operands do not match the mask length, they are extended by concatenating
|
||||||
|
// them with undef vectors. That is probably the right thing for other
|
||||||
|
// targets, but for NEON it is better to concatenate two double-register
|
||||||
|
// size vector operands into a single quad-register size vector. Do that
|
||||||
|
// transformation here:
|
||||||
|
// shuffle(concat(v1, undef), concat(v2, undef)) ->
|
||||||
|
// shuffle(concat(v1, v2), undef)
|
||||||
|
SDValue Op0 = N->getOperand(0);
|
||||||
|
SDValue Op1 = N->getOperand(1);
|
||||||
|
if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
|
||||||
|
Op1.getOpcode() != ISD::CONCAT_VECTORS ||
|
||||||
|
Op0.getNumOperands() != 2 ||
|
||||||
|
Op1.getNumOperands() != 2)
|
||||||
|
return SDValue();
|
||||||
|
SDValue Concat0Op1 = Op0.getOperand(1);
|
||||||
|
SDValue Concat1Op1 = Op1.getOperand(1);
|
||||||
|
if (Concat0Op1.getOpcode() != ISD::UNDEF ||
|
||||||
|
Concat1Op1.getOpcode() != ISD::UNDEF)
|
||||||
|
return SDValue();
|
||||||
|
// Skip the transformation if any of the types are illegal.
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
if (!TLI.isTypeLegal(VT) ||
|
||||||
|
!TLI.isTypeLegal(Concat0Op1.getValueType()) ||
|
||||||
|
!TLI.isTypeLegal(Concat1Op1.getValueType()))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
|
||||||
|
Op0.getOperand(0), Op1.getOperand(0));
|
||||||
|
// Translate the shuffle mask.
|
||||||
|
SmallVector<int, 16> NewMask;
|
||||||
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
|
unsigned HalfElts = NumElts/2;
|
||||||
|
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
|
||||||
|
for (unsigned n = 0; n < NumElts; ++n) {
|
||||||
|
int MaskElt = SVN->getMaskElt(n);
|
||||||
|
int NewElt = -1;
|
||||||
|
if (MaskElt < HalfElts)
|
||||||
|
NewElt = MaskElt;
|
||||||
|
else if (MaskElt >= NumElts && MaskElt < NumElts + HalfElts)
|
||||||
|
NewElt = HalfElts + MaskElt - NumElts;
|
||||||
|
NewMask.push_back(NewElt);
|
||||||
|
}
|
||||||
|
return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
|
||||||
|
DAG.getUNDEF(VT), NewMask.data());
|
||||||
|
}
|
||||||
|
|
||||||
/// PerformVDUPLANECombine - Target-specific dag combine xforms for
|
/// PerformVDUPLANECombine - Target-specific dag combine xforms for
|
||||||
/// ARMISD::VDUPLANE.
|
/// ARMISD::VDUPLANE.
|
||||||
static SDValue PerformVDUPLANECombine(SDNode *N, SelectionDAG &DAG) {
|
static SDValue PerformVDUPLANECombine(SDNode *N, SelectionDAG &DAG) {
|
||||||
@ -4939,6 +4993,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
|
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
|
||||||
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
|
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
|
||||||
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
|
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
|
||||||
|
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
|
||||||
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI.DAG);
|
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI.DAG);
|
||||||
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
|
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
|
||||||
case ISD::SHL:
|
case ISD::SHL:
|
||||||
|
@ -129,3 +129,21 @@ define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
|
|||||||
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
|
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
|
||||||
ret <8 x i16> %tmp2
|
ret <8 x i16> %tmp2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; A vcombine feeding a VREV should not obscure things. Radar 8597007.
|
||||||
|
|
||||||
|
define void @test_with_vcombine(<4 x float>* %v) nounwind {
|
||||||
|
;CHECK: test_with_vcombine:
|
||||||
|
;CHECK-NOT: vext
|
||||||
|
;CHECK: vrev64.32
|
||||||
|
%tmp1 = load <4 x float>* %v, align 16
|
||||||
|
%tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
|
||||||
|
%tmp3 = extractelement <2 x double> %tmp2, i32 0
|
||||||
|
%tmp4 = bitcast double %tmp3 to <2 x float>
|
||||||
|
%tmp5 = extractelement <2 x double> %tmp2, i32 1
|
||||||
|
%tmp6 = bitcast double %tmp5 to <2 x float>
|
||||||
|
%tmp7 = fadd <2 x float> %tmp6, %tmp6
|
||||||
|
%tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||||
|
store <4 x float> %tmp8, <4 x float>* %v, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user