InstCombine: extract instead of shuffle when performing vector/array type punning

Summary: SROA generates code that isn't quite as easy to optimize and contains unusual-sized shuffles, but that code is generally correct. As discussed in D7487 the right place to clean things up is InstCombine, which will pick up the type-punning pattern and transform it into a more obvious bitcast+extractelement, while leaving the other patterns SROA encounters as-is.

Test Plan: make check

Reviewers: jvoung, chandlerc

Subscribers: llvm-commits

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230560 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
JF Bastien
2015-02-25 22:30:51 +00:00
parent 463734499e
commit 6fec24744f
2 changed files with 253 additions and 5 deletions

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@ -853,10 +854,32 @@ static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
}
}
// Returns true if the shuffle is extracting a contiguous range of values from
// LHS, for example:
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
// Shuffles to: |EE|FF|GG|HH|
// +--+--+--+--+
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
SmallVector<int, 16> &Mask) {
unsigned LHSElems =
cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
unsigned MaskElems = Mask.size();
unsigned BegIdx = Mask.front();
unsigned EndIdx = Mask.back();
if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
return false;
for (unsigned I = 0; I != MaskElems; ++I)
if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
return false;
return true;
}
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
bool MadeChange = false;
@ -892,18 +915,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
if (Mask[i] < 0) {
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
Elts.push_back(UndefValue::get(Int32Ty));
continue;
}
if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
(Mask[i] < (int)e && isa<UndefValue>(LHS))) {
Mask[i] = -1; // Turn into undef.
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
Elts.push_back(UndefValue::get(Int32Ty));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
Mask[i]));
Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
}
}
SVI.setOperand(0, SVI.getOperand(1));
@ -929,6 +951,96 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return ReplaceInstUsesWith(SVI, V);
}
// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
// a non-vector type. We can instead bitcast the original vector followed by
// an extract of the desired element:
//
// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// %1 = bitcast <4 x i8> %sroa to i32
// Becomes:
// %bc = bitcast <16 x i8> %in to <4 x i32>
// %ext = extractelement <4 x i32> %bc, i32 0
//
// If the shuffle is extracting a contiguous range of values from the input
// vector then each use which is a bitcast of the extracted size can be
// replaced. This will work if the vector types are compatible, and the begin
// index is aligned to a value in the casted vector type. If the begin index
// isn't aligned then we can shuffle the original vector (keeping the same
// vector type) before extracting.
//
// This code will bail out if the target type is fundamentally incompatible
// with vectors of the source type.
//
// Example of <16 x i8>, target type i32:
// Index range [4,8): v-----------v Will work.
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
// <16 x i8>: | | | | | | | | | | | | | | | | |
// <4 x i32>: | | | | |
// +-----------+-----------+-----------+-----------+
// Index range [6,10): ^-----------^ Needs an extra shuffle.
// Target type i40: ^--------------^ Won't work, bail.
if (isShuffleExtractingFromLHS(SVI, Mask)) {
Value *V = LHS;
unsigned MaskElems = Mask.size();
unsigned BegIdx = Mask.front();
VectorType *SrcTy = cast<VectorType>(V->getType());
unsigned VecBitWidth = SrcTy->getBitWidth();
unsigned SrcElemBitWidth =
SrcTy->getElementType()->getPrimitiveSizeInBits();
assert(SrcElemBitWidth && "vector elements must have a bitwidth");
unsigned SrcNumElems = SrcTy->getNumElements();
SmallVector<BitCastInst *, 8> BCs;
DenseMap<Type *, Value *> NewBCs;
for (User *U : SVI.users())
if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
if (!BC->use_empty())
// Only visit bitcasts that weren't previously handled.
BCs.push_back(BC);
for (BitCastInst *BC : BCs) {
Type *TgtTy = BC->getDestTy();
unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits();
if (!TgtElemBitWidth)
continue;
unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
if (!VecBitWidthsEqual)
continue;
if (!VectorType::isValidElementType(TgtTy))
continue;
VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
if (!BegIsAligned) {
// Shuffle the input so [0,NumElements) contains the output, and
// [NumElems,SrcNumElems) is undef.
SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
UndefValue::get(Int32Ty));
for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(ShuffleMask),
SVI.getName() + ".extract");
BegIdx = 0;
}
unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
assert(SrcElemsPerTgtElem);
BegIdx /= SrcElemsPerTgtElem;
bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
auto *NewBC =
BCAlreadyExists
? NewBCs[CastSrcTy]
: Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
if (!BCAlreadyExists)
NewBCs[CastSrcTy] = NewBC;
auto *Ext = Builder->CreateExtractElement(
NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
// The shufflevector isn't being replaced: the bitcast that used it
// is. InstCombine will visit the newly-created instructions.
ReplaceInstUsesWith(*BC, Ext);
MadeChange = true;
}
}
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
@ -1099,7 +1211,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
SmallVector<Constant*, 16> Elts;
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
if (newMask[i] < 0) {
Elts.push_back(UndefValue::get(Int32Ty));