Add a new DAGCombine optimization for BUILD_VECTOR.

If all of the inputs are zero/any_extended, create a new simple BV
which can be further optimized by other BV optimizations.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143297 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2011-10-29 21:23:04 +00:00
parent f86545ecfd
commit b00418af67
4 changed files with 106 additions and 5 deletions

View File

@ -6936,7 +6936,90 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
// optimizations.
EVT SourceType = MVT::Other;
bool allExtend = true;
bool allAnyExt = true;
for (unsigned i = 0; i < NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
// Abort non-extend incoming values.
if (!ZeroExt && !AnyExt) {
allExtend = false;
break;
}
// The input is a ZeroExt or AnyExt. Check the original type.
EVT InTy = In.getOperand(0).getValueType();
// Check that all of the widened source types are the same.
if (SourceType == MVT::Other)
SourceType = InTy;
else if (InTy != SourceType) {
// Multiple income types. Abort.
allExtend = false;
break;
}
// Check if all of the extends are ANY_EXTENDs.
allAnyExt &= AnyExt;
}
// And we are post type-legalization,
// If all of the values are Ext or undef,
// We have a non undef entry.
if (LegalTypes && allExtend && SourceType != MVT::Other) {
bool isLE = TLI.isLittleEndian();
EVT InScalarTy = SourceType.getScalarType();
EVT OutScalarTy = N->getValueType(0).getScalarType();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy):
DAG.getConstant(0, InScalarTy);
unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
SmallVector<SDValue,8> Ops(NewBVElems , Filler);
// Populate the new build_vector
for (unsigned i=0; i < N->getNumOperands(); ++i) {
SDValue Cast = N->getOperand(i);
assert(Cast.getOpcode() == ISD::ANY_EXTEND ||
Cast.getOpcode() == ISD::ZERO_EXTEND ||
Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode");
SDValue In;
if (Cast.getOpcode() == ISD::UNDEF)
In = DAG.getUNDEF(InScalarTy);
else
In = Cast->getOperand(0);
unsigned Index = isLE ? (i * ElemRatio) :
(i * ElemRatio + (ElemRatio - 1));
assert(Index < Ops.size() && "Invalid index");
Ops[Index] = In;
}
// The type of the new BUILD_VECTOR node.
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems);
assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
"Invalid vector size");
// Make the new BUILD_VECTOR.
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
VecVT, &Ops[0], Ops.size());
// Bitcast to the desired type.
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
}
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 3
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 2
define i64 @a(i32 %a, i32 %b) nounwind readnone {
entry:

View File

@ -0,0 +1,16 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
;CHECK: ltstore
;CHECK: pshufd
;CHECK: pshufd
;CHECK: ret
define void @ltstore() {
entry:
%in = load <4 x i32>* undef
%j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
store <2 x i32> %j, <2 x i32>* undef
ret void
}

View File

@ -26,10 +26,12 @@ entry:
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
entry:
; CHECK: movl 36({{%rdi|%rcx}})
; CHECK-NEXT: movl 48({{%rdi|%rcx}})
; CHECK: punpcklqdq
; CHECK: movq %xmm0, ({{%rsi|%rdx}})
; CHECK: t02
; CHECK: movaps
; CHECK: shufps
; CHECK: pshufd
; CHECK: movq
; CHECK: ret
%0 = bitcast <8 x i32>* %source to <4 x i32>*
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
%tmp2 = load <4 x i32>* %arrayidx, align 16