mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-28 07:17:32 +00:00
Added a x86 dag combine to increase the chances to use a
movq for v2i64 on x86-32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68368 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -8061,15 +8061,43 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask,
|
|||||||
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
|
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
|
||||||
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
|
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
|
||||||
/// if the load addresses are consecutive, non-overlapping, and in the right
|
/// if the load addresses are consecutive, non-overlapping, and in the right
|
||||||
/// order.
|
/// order. In the case of v2i64, it will see if it can rewrite the
|
||||||
|
/// shuffle to be an appropriate build vector so it can take advantage of
|
||||||
|
// performBuildVectorCombine.
|
||||||
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
const TargetLowering &TLI) {
|
const TargetLowering &TLI) {
|
||||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
|
||||||
DebugLoc dl = N->getDebugLoc();
|
DebugLoc dl = N->getDebugLoc();
|
||||||
MVT VT = N->getValueType(0);
|
MVT VT = N->getValueType(0);
|
||||||
MVT EVT = VT.getVectorElementType();
|
MVT EVT = VT.getVectorElementType();
|
||||||
SDValue PermMask = N->getOperand(2);
|
SDValue PermMask = N->getOperand(2);
|
||||||
unsigned NumElems = PermMask.getNumOperands();
|
unsigned NumElems = PermMask.getNumOperands();
|
||||||
|
|
||||||
|
// For x86-32 machines, if we see an insert and then a shuffle in a v2i64
|
||||||
|
// where the upper half is 0, it is advantageous to rewrite it as a build
|
||||||
|
// vector of (0, val) so it can use movq.
|
||||||
|
if (VT == MVT::v2i64) {
|
||||||
|
SDValue In[2];
|
||||||
|
In[0] = N->getOperand(0);
|
||||||
|
In[1] = N->getOperand(1);
|
||||||
|
unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue();
|
||||||
|
unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue();
|
||||||
|
if (In[0].getValueType().getVectorNumElements() == NumElems &&
|
||||||
|
In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT &&
|
||||||
|
In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) {
|
||||||
|
ConstantSDNode* InsertVecIdx =
|
||||||
|
dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2));
|
||||||
|
if (InsertVecIdx &&
|
||||||
|
InsertVecIdx->getZExtValue() == (Idx0 % 2) &&
|
||||||
|
isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) {
|
||||||
|
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
|
||||||
|
In[Idx0/2].getOperand(1),
|
||||||
|
In[Idx1/2].getOperand(Idx1 % 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to combine a vector_shuffle into a 128-bit load.
|
||||||
|
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||||
SDNode *Base = NULL;
|
SDNode *Base = NULL;
|
||||||
if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
|
if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
|
||||||
DAG, MFI, TLI))
|
DAG, MFI, TLI))
|
||||||
|
22
test/CodeGen/X86/vec_i64.ll
Normal file
22
test/CodeGen/X86/vec_i64.ll
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||||
|
; RUN: grep movq %t | count 2
|
||||||
|
|
||||||
|
; Used movq to load i64 into a v2i64 when the top i64 is 0.
|
||||||
|
|
||||||
|
define <2 x i64> @foo1(i64* %y) nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp1 = load i64* %y, align 8 ; <i64> [#uses=1]
|
||||||
|
%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
|
||||||
|
%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
|
||||||
|
ret <2 x i64> %loadl
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @foo2(i64* %p) nounwind {
|
||||||
|
entry:
|
||||||
|
%load = load i64* %p
|
||||||
|
%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
|
||||||
|
%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
|
||||||
|
%0 = bitcast <2 x i64> %loadl to <4 x float>
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
Reference in New Issue
Block a user