mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
Add a DAGCombine for transforming 128->256 casts into a simple
vxorps + vinsertf128 pair of instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135727 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d088834fb9
commit
74dad551d8
@ -11232,23 +11232,77 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
|
||||
return TargetLowering::isGAPlusOffset(N, GA, Offset);
|
||||
}
|
||||
|
||||
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
|
||||
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
|
||||
/// if the load addresses are consecutive, non-overlapping, and in the right
|
||||
/// order.
|
||||
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
|
||||
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
SDValue V1 = SVOp->getOperand(0);
|
||||
SDValue V2 = SVOp->getOperand(1);
|
||||
EVT VT = SVOp->getValueType(0);
|
||||
|
||||
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
V2.getOpcode() == ISD::CONCAT_VECTORS) {
|
||||
//
|
||||
// 0,0,0,...
|
||||
// \
|
||||
// V UNDEF BUILD_VECTOR UNDEF
|
||||
// \ / \ /
|
||||
// CONCAT_VECTOR CONCAT_VECTOR
|
||||
// \ /
|
||||
// \ /
|
||||
// RESULT: V + zero extended
|
||||
//
|
||||
if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
|
||||
V2.getOperand(1).getOpcode() != ISD::UNDEF ||
|
||||
V1.getOperand(1).getOpcode() != ISD::UNDEF)
|
||||
return SDValue();
|
||||
|
||||
if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
|
||||
return SDValue();
|
||||
|
||||
// To match the shuffle mask, the first half of the mask should
|
||||
// be exactly the first vector, and all the rest a splat with the
|
||||
// first element of the second one.
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
for (int i = 0; i < NumElems/2; ++i)
|
||||
if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
|
||||
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
|
||||
return SDValue();
|
||||
|
||||
// Emit a zeroed vector and insert the desired subvector on its
|
||||
// first half.
|
||||
SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl);
|
||||
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
|
||||
DAG.getConstant(0, MVT::i32), DAG, dl);
|
||||
return DCI.CombineTo(N, InsV);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformShuffleCombine - Performs several different shuffle combines.
|
||||
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (VT.getSizeInBits() != 128)
|
||||
return SDValue();
|
||||
|
||||
// Don't create instructions with illegal types after legalize types has run.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
|
||||
return SDValue();
|
||||
|
||||
// Only handle pure VECTOR_SHUFFLE nodes.
|
||||
if (VT.getSizeInBits() == 256 && N->getOpcode() == ISD::VECTOR_SHUFFLE)
|
||||
return PerformShuffleCombine256(N, DAG, DCI);
|
||||
|
||||
// Only handle 128 wide vector from here on.
|
||||
if (VT.getSizeInBits() != 128)
|
||||
return SDValue();
|
||||
|
||||
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
|
||||
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
|
||||
// consecutive, non-overlapping, and in the right order.
|
||||
SmallVector<SDValue, 16> Elts;
|
||||
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
|
||||
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
|
||||
|
26
test/CodeGen/X86/avx-cast.ll
Normal file
26
test/CodeGen/X86/avx-cast.ll
Normal file
@ -0,0 +1,26 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
; CHECK: vxorps
|
||||
; CHECK-NEXT: vinsertf128 $0
|
||||
define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
|
||||
ret <8 x float> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vxorps
|
||||
; CHECK-NEXT: vinsertf128 $0
|
||||
define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vxorps
|
||||
; CHECK-NEXT: vinsertf128 $0
|
||||
define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user