mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-25 00:24:26 +00:00
InstCombine: extract instead of shuffle when performing vector/array type punning
Summary: SROA generates code that isn't quite as easy to optimize and contains unusual-sized shuffles, but that code is generally correct. As discussed in D7487 the right place to clean things up is InstCombine, which will pick up the type-punning pattern and transform it into a more obvious bitcast+extractelement, while leaving the other patterns SROA encounters as-is. Test Plan: make check Reviewers: jvoung, chandlerc Subscribers: llvm-commits git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230560 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -13,6 +13,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "InstCombineInternal.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
using namespace llvm;
|
||||
using namespace PatternMatch;
|
||||
@ -853,10 +854,32 @@ static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the shuffle is extracting a contiguous range of values from
|
||||
// LHS, for example:
|
||||
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
||||
// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
|
||||
// Shuffles to: |EE|FF|GG|HH|
|
||||
// +--+--+--+--+
|
||||
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
|
||||
SmallVector<int, 16> &Mask) {
|
||||
unsigned LHSElems =
|
||||
cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
|
||||
unsigned MaskElems = Mask.size();
|
||||
unsigned BegIdx = Mask.front();
|
||||
unsigned EndIdx = Mask.back();
|
||||
if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
|
||||
return false;
|
||||
for (unsigned I = 0; I != MaskElems; ++I)
|
||||
if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
Value *LHS = SVI.getOperand(0);
|
||||
Value *RHS = SVI.getOperand(1);
|
||||
SmallVector<int, 16> Mask = SVI.getShuffleMask();
|
||||
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
|
||||
|
||||
bool MadeChange = false;
|
||||
|
||||
@ -892,18 +915,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
SmallVector<Constant*, 16> Elts;
|
||||
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
|
||||
if (Mask[i] < 0) {
|
||||
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
|
||||
Elts.push_back(UndefValue::get(Int32Ty));
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
|
||||
(Mask[i] < (int)e && isa<UndefValue>(LHS))) {
|
||||
Mask[i] = -1; // Turn into undef.
|
||||
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
|
||||
Elts.push_back(UndefValue::get(Int32Ty));
|
||||
} else {
|
||||
Mask[i] = Mask[i] % e; // Force to LHS.
|
||||
Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
|
||||
Mask[i]));
|
||||
Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
|
||||
}
|
||||
}
|
||||
SVI.setOperand(0, SVI.getOperand(1));
|
||||
@ -929,6 +951,96 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
return ReplaceInstUsesWith(SVI, V);
|
||||
}
|
||||
|
||||
// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
|
||||
// a non-vector type. We can instead bitcast the original vector followed by
|
||||
// an extract of the desired element:
|
||||
//
|
||||
// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
|
||||
// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
// %1 = bitcast <4 x i8> %sroa to i32
|
||||
// Becomes:
|
||||
// %bc = bitcast <16 x i8> %in to <4 x i32>
|
||||
// %ext = extractelement <4 x i32> %bc, i32 0
|
||||
//
|
||||
// If the shuffle is extracting a contiguous range of values from the input
|
||||
// vector then each use which is a bitcast of the extracted size can be
|
||||
// replaced. This will work if the vector types are compatible, and the begin
|
||||
// index is aligned to a value in the casted vector type. If the begin index
|
||||
// isn't aligned then we can shuffle the original vector (keeping the same
|
||||
// vector type) before extracting.
|
||||
//
|
||||
// This code will bail out if the target type is fundamentally incompatible
|
||||
// with vectors of the source type.
|
||||
//
|
||||
// Example of <16 x i8>, target type i32:
|
||||
// Index range [4,8): v-----------v Will work.
|
||||
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
||||
// <16 x i8>: | | | | | | | | | | | | | | | | |
|
||||
// <4 x i32>: | | | | |
|
||||
// +-----------+-----------+-----------+-----------+
|
||||
// Index range [6,10): ^-----------^ Needs an extra shuffle.
|
||||
// Target type i40: ^--------------^ Won't work, bail.
|
||||
if (isShuffleExtractingFromLHS(SVI, Mask)) {
|
||||
Value *V = LHS;
|
||||
unsigned MaskElems = Mask.size();
|
||||
unsigned BegIdx = Mask.front();
|
||||
VectorType *SrcTy = cast<VectorType>(V->getType());
|
||||
unsigned VecBitWidth = SrcTy->getBitWidth();
|
||||
unsigned SrcElemBitWidth =
|
||||
SrcTy->getElementType()->getPrimitiveSizeInBits();
|
||||
assert(SrcElemBitWidth && "vector elements must have a bitwidth");
|
||||
unsigned SrcNumElems = SrcTy->getNumElements();
|
||||
SmallVector<BitCastInst *, 8> BCs;
|
||||
DenseMap<Type *, Value *> NewBCs;
|
||||
for (User *U : SVI.users())
|
||||
if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
|
||||
if (!BC->use_empty())
|
||||
// Only visit bitcasts that weren't previously handled.
|
||||
BCs.push_back(BC);
|
||||
for (BitCastInst *BC : BCs) {
|
||||
Type *TgtTy = BC->getDestTy();
|
||||
unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits();
|
||||
if (!TgtElemBitWidth)
|
||||
continue;
|
||||
unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
|
||||
bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
|
||||
bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
|
||||
if (!VecBitWidthsEqual)
|
||||
continue;
|
||||
if (!VectorType::isValidElementType(TgtTy))
|
||||
continue;
|
||||
VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
|
||||
if (!BegIsAligned) {
|
||||
// Shuffle the input so [0,NumElements) contains the output, and
|
||||
// [NumElems,SrcNumElems) is undef.
|
||||
SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
|
||||
UndefValue::get(Int32Ty));
|
||||
for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
|
||||
ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
|
||||
V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
|
||||
ConstantVector::get(ShuffleMask),
|
||||
SVI.getName() + ".extract");
|
||||
BegIdx = 0;
|
||||
}
|
||||
unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
|
||||
assert(SrcElemsPerTgtElem);
|
||||
BegIdx /= SrcElemsPerTgtElem;
|
||||
bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
|
||||
auto *NewBC =
|
||||
BCAlreadyExists
|
||||
? NewBCs[CastSrcTy]
|
||||
: Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
|
||||
if (!BCAlreadyExists)
|
||||
NewBCs[CastSrcTy] = NewBC;
|
||||
auto *Ext = Builder->CreateExtractElement(
|
||||
NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
|
||||
// The shufflevector isn't being replaced: the bitcast that used it
|
||||
// is. InstCombine will visit the newly-created instructions.
|
||||
ReplaceInstUsesWith(*BC, Ext);
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If the LHS is a shufflevector itself, see if we can combine it with this
|
||||
// one without producing an unusual shuffle.
|
||||
// Cases that might be simplified:
|
||||
@ -1099,7 +1211,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
// or is a splat, do the replacement.
|
||||
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
|
||||
SmallVector<Constant*, 16> Elts;
|
||||
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
|
||||
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
|
||||
if (newMask[i] < 0) {
|
||||
Elts.push_back(UndefValue::get(Int32Ty));
|
||||
|
Reference in New Issue
Block a user