mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-19 04:32:19 +00:00
BBVectorize: Use a more sophisticated check for input cost
The old checking code, which assumed that input shuffles and insert-elements could always be folded (and thus were free) is too simple. This can only happen in special circumstances. Using the simple check caused infinite recursion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167750 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6996fd0b54
commit
86c88c938a
@ -28,6 +28,7 @@
|
||||
#include "llvm/Type.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
@ -400,6 +401,7 @@ namespace {
|
||||
DEBUG(dbgs() << "BBV: fusing loop #" << n <<
|
||||
" for " << BB.getName() << " in " <<
|
||||
BB.getParent()->getName() << "...\n");
|
||||
assert(n < 10 && "hrmm, really?");
|
||||
if (vectorizePairs(BB))
|
||||
changed = true;
|
||||
else
|
||||
@ -1765,9 +1767,12 @@ namespace {
|
||||
bool NeedsExtraction = false;
|
||||
for (Value::use_iterator I = S->first->use_begin(),
|
||||
IE = S->first->use_end(); I != IE; ++I) {
|
||||
if (isa<ShuffleVectorInst>(*I) ||
|
||||
isa<InsertElementInst>(*I) ||
|
||||
isa<ExtractElementInst>(*I))
|
||||
if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
|
||||
// Shuffle can be folded if it has no other input
|
||||
if (isa<UndefValue>(SI->getOperand(1)))
|
||||
continue;
|
||||
}
|
||||
if (isa<ExtractElementInst>(*I))
|
||||
continue;
|
||||
if (PrunedTreeInstrs.count(*I))
|
||||
continue;
|
||||
@ -1792,9 +1797,12 @@ namespace {
|
||||
NeedsExtraction = false;
|
||||
for (Value::use_iterator I = S->second->use_begin(),
|
||||
IE = S->second->use_end(); I != IE; ++I) {
|
||||
if (isa<ShuffleVectorInst>(*I) ||
|
||||
isa<InsertElementInst>(*I) ||
|
||||
isa<ExtractElementInst>(*I))
|
||||
if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
|
||||
// Shuffle can be folded if it has no other input
|
||||
if (isa<UndefValue>(SI->getOperand(1)))
|
||||
continue;
|
||||
}
|
||||
if (isa<ExtractElementInst>(*I))
|
||||
continue;
|
||||
if (PrunedTreeInstrs.count(*I))
|
||||
continue;
|
||||
@ -1844,14 +1852,35 @@ namespace {
|
||||
|
||||
// Combining vector operations of the same type is also assumed
|
||||
// folded with other operations.
|
||||
if (Ty1 == Ty2 &&
|
||||
(isa<ShuffleVectorInst>(O1) ||
|
||||
isa<InsertElementInst>(O1) ||
|
||||
isa<InsertElementInst>(O1)) &&
|
||||
(isa<ShuffleVectorInst>(O2) ||
|
||||
isa<InsertElementInst>(O2) ||
|
||||
isa<InsertElementInst>(O2)))
|
||||
continue;
|
||||
if (Ty1 == Ty2) {
|
||||
// If both are insert elements, then both can be widened.
|
||||
if (isa<InsertElementInst>(O1) && isa<InsertElementInst>(O2))
|
||||
continue;
|
||||
// If both are extract elements, and both have the same input
|
||||
// type, then they can be replaced with a shuffle
|
||||
ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
|
||||
*EIO2 = dyn_cast<ExtractElementInst>(O2);
|
||||
if (EIO1 && EIO2 &&
|
||||
EIO1->getOperand(0)->getType() ==
|
||||
EIO2->getOperand(0)->getType())
|
||||
continue;
|
||||
// If both are a shuffle with equal operand types and only two
|
||||
// unqiue operands, then they can be replaced with a single
|
||||
// shuffle
|
||||
ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
|
||||
*SIO2 = dyn_cast<ShuffleVectorInst>(O2);
|
||||
if (SIO1 && SIO2 &&
|
||||
SIO1->getOperand(0)->getType() ==
|
||||
SIO2->getOperand(0)->getType()) {
|
||||
SmallSet<Value *, 4> SIOps;
|
||||
SIOps.insert(SIO1->getOperand(0));
|
||||
SIOps.insert(SIO1->getOperand(1));
|
||||
SIOps.insert(SIO2->getOperand(0));
|
||||
SIOps.insert(SIO2->getOperand(1));
|
||||
if (SIOps.size() <= 2)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int ESContrib;
|
||||
// This pair has already been formed.
|
||||
|
54
test/Transforms/BBVectorize/X86/sh-rec.ll
Normal file
54
test/Transforms/BBVectorize/X86/sh-rec.ll
Normal file
@ -0,0 +1,54 @@
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
|
||||
|
||||
define void @ptoa() nounwind uwtable {
|
||||
entry:
|
||||
%call = call i8* @malloc() nounwind
|
||||
br i1 undef, label %return, label %if.end10
|
||||
|
||||
if.end10: ; preds = %entry
|
||||
%incdec.ptr = getelementptr inbounds i8* %call, i64 undef
|
||||
%call17 = call i32 @ptou() nounwind
|
||||
%incdec.ptr26.1 = getelementptr inbounds i8* %incdec.ptr, i64 -2
|
||||
store i8 undef, i8* %incdec.ptr26.1, align 1
|
||||
%div27.1 = udiv i32 %call17, 100
|
||||
%rem.2 = urem i32 %div27.1, 10
|
||||
%add2230.2 = or i32 %rem.2, 48
|
||||
%conv25.2 = trunc i32 %add2230.2 to i8
|
||||
%incdec.ptr26.2 = getelementptr inbounds i8* %incdec.ptr, i64 -3
|
||||
store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
|
||||
%incdec.ptr26.3 = getelementptr inbounds i8* %incdec.ptr, i64 -4
|
||||
store i8 undef, i8* %incdec.ptr26.3, align 1
|
||||
%div27.3 = udiv i32 %call17, 10000
|
||||
%rem.4 = urem i32 %div27.3, 10
|
||||
%add2230.4 = or i32 %rem.4, 48
|
||||
%conv25.4 = trunc i32 %add2230.4 to i8
|
||||
%incdec.ptr26.4 = getelementptr inbounds i8* %incdec.ptr, i64 -5
|
||||
store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
|
||||
%div27.4 = udiv i32 %call17, 100000
|
||||
%rem.5 = urem i32 %div27.4, 10
|
||||
%add2230.5 = or i32 %rem.5, 48
|
||||
%conv25.5 = trunc i32 %add2230.5 to i8
|
||||
%incdec.ptr26.5 = getelementptr inbounds i8* %incdec.ptr, i64 -6
|
||||
store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
|
||||
%incdec.ptr26.6 = getelementptr inbounds i8* %incdec.ptr, i64 -7
|
||||
store i8 0, i8* %incdec.ptr26.6, align 1
|
||||
%incdec.ptr26.7 = getelementptr inbounds i8* %incdec.ptr, i64 -8
|
||||
store i8 undef, i8* %incdec.ptr26.7, align 1
|
||||
%div27.7 = udiv i32 %call17, 100000000
|
||||
%rem.8 = urem i32 %div27.7, 10
|
||||
%add2230.8 = or i32 %rem.8, 48
|
||||
%conv25.8 = trunc i32 %add2230.8 to i8
|
||||
%incdec.ptr26.8 = getelementptr inbounds i8* %incdec.ptr, i64 -9
|
||||
store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
|
||||
unreachable
|
||||
|
||||
return: ; preds = %entry
|
||||
ret void
|
||||
; CHECK: @ptoa
|
||||
}
|
||||
|
||||
declare noalias i8* @malloc() nounwind
|
||||
|
||||
declare i32 @ptou()
|
Loading…
x
Reference in New Issue
Block a user