mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-23 02:32:11 +00:00
[x86] Inline an SSE4 helper function for INSERT_VECTOR_ELT lowering, no
functionality changed. Separating this into two functions wasn't helping. There was a decent amount of boilerplate duplicated, and some subsequent refactorings here will pull even more common code out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216644 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
da3a293313
commit
1201cc156f
@ -10653,59 +10653,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
SDValue N2 = Op.getOperand(2);
|
||||
|
||||
if (!VT.is128BitVector())
|
||||
return SDValue();
|
||||
|
||||
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
|
||||
isa<ConstantSDNode>(N2)) {
|
||||
unsigned Opc;
|
||||
if (VT == MVT::v8i16) {
|
||||
Opc = X86ISD::PINSRW;
|
||||
} else {
|
||||
assert(VT == MVT::v16i8);
|
||||
Opc = X86ISD::PINSRB;
|
||||
}
|
||||
|
||||
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
|
||||
// argument.
|
||||
if (N1.getValueType() != MVT::i32)
|
||||
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
|
||||
if (N2.getValueType() != MVT::i32)
|
||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
|
||||
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
|
||||
}
|
||||
|
||||
if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
|
||||
// Bits [7:6] of the constant are the source select. This will always be
|
||||
// zero here. The DAG Combiner may combine an extract_elt index into these
|
||||
// bits. For example (insert (extract, 3), 2) could be matched by putting
|
||||
// the '3' into bits [7:6] of X86ISD::INSERTPS.
|
||||
// Bits [5:4] of the constant are the destination select. This is the
|
||||
// value of the incoming immediate.
|
||||
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
|
||||
// combine either bitwise AND or insert of float 0.0 to set these bits.
|
||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
|
||||
// Create this as a scalar to vector..
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
|
||||
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
|
||||
}
|
||||
|
||||
if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
|
||||
// PINSR* works with constant index.
|
||||
return Op;
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Insert one bit to mask vector, like v16i1 or v8i1.
|
||||
/// AVX-512 feature.
|
||||
SDValue
|
||||
@ -10744,7 +10691,7 @@ SDValue
|
||||
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
|
||||
|
||||
if (EltVT == MVT::i1)
|
||||
return InsertBitToMaskVector(Op, DAG);
|
||||
|
||||
@ -10753,8 +10700,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
SDValue N2 = Op.getOperand(2);
|
||||
|
||||
// If this is a 256-bit vector result, first extract the 128-bit vector,
|
||||
// insert the element into the extracted half and then place it back.
|
||||
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
|
||||
// into that, and then insert the subvector back into the result.
|
||||
if (VT.is256BitVector() || VT.is512BitVector()) {
|
||||
if (!isa<ConstantSDNode>(N2))
|
||||
return SDValue();
|
||||
@ -10774,8 +10721,48 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
||||
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE41())
|
||||
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
|
||||
if (Subtarget->hasSSE41()) {
|
||||
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
|
||||
isa<ConstantSDNode>(N2)) {
|
||||
unsigned Opc;
|
||||
if (VT == MVT::v8i16) {
|
||||
Opc = X86ISD::PINSRW;
|
||||
} else {
|
||||
assert(VT == MVT::v16i8);
|
||||
Opc = X86ISD::PINSRB;
|
||||
}
|
||||
|
||||
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
|
||||
// argument.
|
||||
if (N1.getValueType() != MVT::i32)
|
||||
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
|
||||
if (N2.getValueType() != MVT::i32)
|
||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
|
||||
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
|
||||
}
|
||||
|
||||
if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
|
||||
// Bits [7:6] of the constant are the source select. This will always be
|
||||
// zero here. The DAG Combiner may combine an extract_elt index into
|
||||
// these
|
||||
// bits. For example (insert (extract, 3), 2) could be matched by
|
||||
// putting
|
||||
// the '3' into bits [7:6] of X86ISD::INSERTPS.
|
||||
// Bits [5:4] of the constant are the destination select. This is the
|
||||
// value of the incoming immediate.
|
||||
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
|
||||
// combine either bitwise AND or insert of float 0.0 to set these bits.
|
||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
|
||||
// Create this as a scalar to vector..
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
|
||||
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
|
||||
}
|
||||
|
||||
if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
|
||||
// PINSR* works with constant index.
|
||||
return Op;
|
||||
}
|
||||
}
|
||||
|
||||
if (EltVT == MVT::i8)
|
||||
return SDValue();
|
||||
|
Loading…
x
Reference in New Issue
Block a user