mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-07-21 02:29:22 +00:00
Lower certain build_vectors to insertps instructions
Summary: Vectors built with zeros and elements in the same order as another (source) vector are optimized to be built using a single insertps instruction. Also optimize when we move one element in a vector to a different place in that vector while zeroing out some of the other elements. Further optimizations are possible, described in TODO comments. I will be implementing at least some of them in the near future. Added some tests for different cases where this optimization triggers. Reviewers: nadav, delena, craig.topper Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3521 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208271 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
52298507e8
commit
b19c087aa7
@ -5437,6 +5437,74 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
|
|||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
|
||||||
|
static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
|
||||||
|
unsigned NonZeros, unsigned NumNonZero,
|
||||||
|
unsigned NumZero, SelectionDAG &DAG,
|
||||||
|
const X86Subtarget *Subtarget,
|
||||||
|
const TargetLowering &TLI) {
|
||||||
|
// We know there's at least one non-zero element
|
||||||
|
unsigned FirstNonZeroIdx = 0;
|
||||||
|
SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
|
||||||
|
while (FirstNonZero.getOpcode() == ISD::UNDEF ||
|
||||||
|
X86::isZeroNode(FirstNonZero)) {
|
||||||
|
++FirstNonZeroIdx;
|
||||||
|
FirstNonZero = Op->getOperand(FirstNonZeroIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
|
||||||
|
!isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue V = FirstNonZero.getOperand(0);
|
||||||
|
unsigned FirstNonZeroDst = cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
|
||||||
|
unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
|
||||||
|
unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
|
||||||
|
unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
|
||||||
|
|
||||||
|
for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
|
||||||
|
SDValue Elem = Op.getOperand(Idx);
|
||||||
|
if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// TODO: What else can be here? Deal with it.
|
||||||
|
if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// TODO: Some optimizations are still possible here
|
||||||
|
// ex: Getting one element from a vector, and the rest from another.
|
||||||
|
if (Elem.getOperand(0) != V)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
|
||||||
|
if (Dst == Idx)
|
||||||
|
++CorrectIdx;
|
||||||
|
else if (IncorrectIdx == -1U) {
|
||||||
|
IncorrectIdx = Idx;
|
||||||
|
IncorrectDst = Dst;
|
||||||
|
} else
|
||||||
|
// There was already one element with an incorrect index.
|
||||||
|
// We can't optimize this case to an insertps.
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
|
||||||
|
SDLoc dl(Op);
|
||||||
|
EVT VT = Op.getSimpleValueType();
|
||||||
|
unsigned ElementMoveMask = 0;
|
||||||
|
if (IncorrectIdx == -1U)
|
||||||
|
ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
|
||||||
|
else
|
||||||
|
ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
|
||||||
|
|
||||||
|
SDValue InsertpsMask = DAG.getIntPtrConstant(
|
||||||
|
ElementMoveMask | (~NonZeros & 0xf));
|
||||||
|
return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
/// getVShift - Return a vector logical shift node.
|
/// getVShift - Return a vector logical shift node.
|
||||||
///
|
///
|
||||||
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
|
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
|
||||||
@ -6187,6 +6255,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
if (V.getNode()) return V;
|
if (V.getNode()) return V;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
|
||||||
|
if (EVTBits == 32 && NumElems == 4) {
|
||||||
|
SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
|
||||||
|
NumZero, DAG, Subtarget, *this);
|
||||||
|
if (V.getNode())
|
||||||
|
return V;
|
||||||
|
}
|
||||||
|
|
||||||
// If element VT is == 32 bits, turn it into a number of shuffles.
|
// If element VT is == 32 bits, turn it into a number of shuffles.
|
||||||
SmallVector<SDValue, 8> V(NumElems);
|
SmallVector<SDValue, 8> V(NumElems);
|
||||||
if (NumElems == 4 && NumZero > 0) {
|
if (NumElems == 4 && NumZero > 0) {
|
||||||
|
@ -320,3 +320,259 @@ define <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
|
|||||||
%result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
|
%result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
|
||||||
ret <4 x i32> %result
|
ret <4 x i32> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;; Shuffles optimizable with a single insertps instruction
|
||||||
|
define <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_XYZ0:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $8
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x float> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
|
||||||
|
%vecext3 = extractelement <4 x float> %x, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
|
||||||
|
%vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
|
||||||
|
ret <4 x float> %vecinit5
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_XY00:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $12
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x float> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
|
||||||
|
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
|
||||||
|
ret <4 x float> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_XYY0:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $104
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x float> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
|
||||||
|
%vecinit4 = insertelement <4 x float> %vecinit2, float %vecext1, i32 2
|
||||||
|
%vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
|
||||||
|
ret <4 x float> %vecinit5
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_XYW0:
|
||||||
|
; CHECK: insertps $232
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x float> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
|
||||||
|
%vecext2 = extractelement <4 x float> %x, i32 3
|
||||||
|
%vecinit3 = insertelement <4 x float> %vecinit2, float %vecext2, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
|
||||||
|
ret <4 x float> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_W00W:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $198
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 3
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit, float 0.0, i32 1
|
||||||
|
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x float> %vecinit3, float %vecext, i32 3
|
||||||
|
ret <4 x float> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_X00A:
|
||||||
|
; CHECK-NOT: movaps
|
||||||
|
; CHECK-NOT: shufps
|
||||||
|
; CHECK: insertps $48
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
|
||||||
|
%vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||||
|
ret <4 x float> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_X00X:
|
||||||
|
; CHECK-NOT: movaps
|
||||||
|
; CHECK-NOT: shufps
|
||||||
|
; CHECK: insertps $48
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
|
||||||
|
%vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||||
|
ret <4 x float> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
|
||||||
|
; CHECK-LABEL: shuf_X0YC:
|
||||||
|
; CHECK: shufps
|
||||||
|
; CHECK-NOT: movhlps
|
||||||
|
; CHECK-NOT: shufps
|
||||||
|
; CHECK: insertps $176
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
|
||||||
|
%vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
|
||||||
|
%vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
|
||||||
|
ret <4 x float> %vecinit5
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_XYZ0:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $8
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x i32> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||||
|
%vecext3 = extractelement <4 x i32> %x, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
|
||||||
|
%vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
|
||||||
|
ret <4 x i32> %vecinit5
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_XY00:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $12
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x i32> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||||
|
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
|
||||||
|
ret <4 x i32> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_XYY0:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $104
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x i32> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||||
|
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext1, i32 2
|
||||||
|
%vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
|
||||||
|
ret <4 x i32> %vecinit5
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_XYW0:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $232
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x i32> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||||
|
%vecext2 = extractelement <4 x i32> %x, i32 3
|
||||||
|
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 %vecext2, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
|
||||||
|
ret <4 x i32> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_W00W:
|
||||||
|
; CHECK-NOT: pextrd
|
||||||
|
; CHECK-NOT: punpckldq
|
||||||
|
; CHECK: insertps $198
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 3
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit, i32 0, i32 1
|
||||||
|
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x i32> %vecinit3, i32 %vecext, i32 3
|
||||||
|
ret <4 x i32> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_X00A:
|
||||||
|
; CHECK-NOT: movaps
|
||||||
|
; CHECK-NOT: shufps
|
||||||
|
; CHECK: insertps $48
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
|
||||||
|
%vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||||
|
ret <4 x i32> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_X00X:
|
||||||
|
; CHECK-NOT: movaps
|
||||||
|
; CHECK-NOT: shufps
|
||||||
|
; CHECK: insertps $48
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
|
||||||
|
%vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||||
|
ret <4 x i32> %vecinit4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
|
||||||
|
; CHECK-LABEL: i32_shuf_X0YC:
|
||||||
|
; CHECK: shufps
|
||||||
|
; CHECK-NOT: movhlps
|
||||||
|
; CHECK-NOT: shufps
|
||||||
|
; CHECK: insertps $176
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <4 x i32> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||||
|
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
|
||||||
|
%vecinit3 = shufflevector <4 x i32> %vecinit1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
|
||||||
|
%vecinit5 = shufflevector <4 x i32> %vecinit3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
|
||||||
|
ret <4 x i32> %vecinit5
|
||||||
|
}
|
||||||
|
|
||||||
|
;; Test for a bug in the first implementation of LowerBuildVectorv4x32
|
||||||
|
define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
|
||||||
|
; CHECK-LABEL: test_insertps_no_undef:
|
||||||
|
; CHECK: movaps %xmm0, %xmm1
|
||||||
|
; CHECK-NEXT: insertps $8, %xmm1, %xmm1
|
||||||
|
; CHECK-NEXT: maxps %xmm1, %xmm0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%vecext = extractelement <4 x float> %x, i32 0
|
||||||
|
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <4 x float> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
|
||||||
|
%vecext3 = extractelement <4 x float> %x, i32 2
|
||||||
|
%vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
|
||||||
|
%vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
|
||||||
|
%mask = fcmp olt <4 x float> %vecinit5, %x
|
||||||
|
%res = select <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user