mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-27 14:24:40 +00:00
Revert r203230, "CodeGenPrep: sink extends of illegal types into use block."
It choked i686 stage2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203386 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -129,7 +129,6 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
|
|||||||
bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
|
bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
|
||||||
bool OptimizeInlineAsmInst(CallInst *CS);
|
bool OptimizeInlineAsmInst(CallInst *CS);
|
||||||
bool OptimizeCallInst(CallInst *CI);
|
bool OptimizeCallInst(CallInst *CI);
|
||||||
bool SinkExtExpand(CastInst *I);
|
|
||||||
bool MoveExtToFormExtLoad(Instruction *I);
|
bool MoveExtToFormExtLoad(Instruction *I);
|
||||||
bool OptimizeExtUses(Instruction *I);
|
bool OptimizeExtUses(Instruction *I);
|
||||||
bool OptimizeSelectInst(SelectInst *SI);
|
bool OptimizeSelectInst(SelectInst *SI);
|
||||||
@ -465,8 +464,40 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
|
|||||||
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
|
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// SinkCast - Sink the specified cast instruction into its user blocks
|
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
|
||||||
static bool SinkCast(CastInst *CI) {
|
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
|
||||||
|
/// sink it into user blocks to reduce the number of virtual
|
||||||
|
/// registers that must be created and coalesced.
|
||||||
|
///
|
||||||
|
/// Return true if any changes are made.
|
||||||
|
///
|
||||||
|
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
|
||||||
|
// If this is a noop copy,
|
||||||
|
EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
|
||||||
|
EVT DstVT = TLI.getValueType(CI->getType());
|
||||||
|
|
||||||
|
// This is an fp<->int conversion?
|
||||||
|
if (SrcVT.isInteger() != DstVT.isInteger())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If this is an extension, it will be a zero or sign extension, which
|
||||||
|
// isn't a noop.
|
||||||
|
if (SrcVT.bitsLT(DstVT)) return false;
|
||||||
|
|
||||||
|
// If these values will be promoted, find out what they will be promoted
|
||||||
|
// to. This helps us consider truncates on PPC as noop copies when they
|
||||||
|
// are.
|
||||||
|
if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
|
||||||
|
TargetLowering::TypePromoteInteger)
|
||||||
|
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
|
||||||
|
if (TLI.getTypeAction(CI->getContext(), DstVT) ==
|
||||||
|
TargetLowering::TypePromoteInteger)
|
||||||
|
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
|
||||||
|
|
||||||
|
// If, after promotion, these are the same types, this is a noop copy.
|
||||||
|
if (SrcVT != DstVT)
|
||||||
|
return false;
|
||||||
|
|
||||||
BasicBlock *DefBB = CI->getParent();
|
BasicBlock *DefBB = CI->getParent();
|
||||||
|
|
||||||
/// InsertedCasts - Only insert a cast in each block once.
|
/// InsertedCasts - Only insert a cast in each block once.
|
||||||
@ -516,43 +547,6 @@ static bool SinkCast(CastInst *CI) {
|
|||||||
return MadeChange;
|
return MadeChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
|
|
||||||
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
|
|
||||||
/// sink it into user blocks to reduce the number of virtual
|
|
||||||
/// registers that must be created and coalesced.
|
|
||||||
///
|
|
||||||
/// Return true if any changes are made.
|
|
||||||
///
|
|
||||||
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
|
|
||||||
// If this is a noop copy,
|
|
||||||
EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
|
|
||||||
EVT DstVT = TLI.getValueType(CI->getType());
|
|
||||||
|
|
||||||
// This is an fp<->int conversion?
|
|
||||||
if (SrcVT.isInteger() != DstVT.isInteger())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// If this is an extension, it will be a zero or sign extension, which
|
|
||||||
// isn't a noop.
|
|
||||||
if (SrcVT.bitsLT(DstVT)) return false;
|
|
||||||
|
|
||||||
// If these values will be promoted, find out what they will be promoted
|
|
||||||
// to. This helps us consider truncates on PPC as noop copies when they
|
|
||||||
// are.
|
|
||||||
if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
|
|
||||||
TargetLowering::TypePromoteInteger)
|
|
||||||
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
|
|
||||||
if (TLI.getTypeAction(CI->getContext(), DstVT) ==
|
|
||||||
TargetLowering::TypePromoteInteger)
|
|
||||||
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
|
|
||||||
|
|
||||||
// If, after promotion, these are the same types, this is a noop copy.
|
|
||||||
if (SrcVT != DstVT)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return SinkCast(CI);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
|
/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
|
||||||
/// the number of virtual registers that must be created and coalesced. This is
|
/// the number of virtual registers that must be created and coalesced. This is
|
||||||
/// a clear win except on targets with multiple condition code registers
|
/// a clear win except on targets with multiple condition code registers
|
||||||
@ -2523,16 +2517,6 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
|
|||||||
return MadeChange;
|
return MadeChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// SinkExtExpand - Sink a zext or sext into its user blocks if the target type
|
|
||||||
/// doesn't fit in one register
|
|
||||||
bool CodeGenPrepare::SinkExtExpand(CastInst *CI) {
|
|
||||||
if (TLI &&
|
|
||||||
TLI->getTypeAction(CI->getContext(), TLI->getValueType(CI->getType())) ==
|
|
||||||
TargetLowering::TypeExpandInteger)
|
|
||||||
return SinkCast(CI);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
|
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
|
||||||
/// basic block as the load, unless conditions are unfavorable. This allows
|
/// basic block as the load, unless conditions are unfavorable. This allows
|
||||||
/// SelectionDAG to fold the extend into the load.
|
/// SelectionDAG to fold the extend into the load.
|
||||||
@ -2546,12 +2530,6 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
|
|||||||
if (LI->getParent() == I->getParent())
|
if (LI->getParent() == I->getParent())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Do not undo the optimization in SinkExtExpand
|
|
||||||
if (TLI &&
|
|
||||||
TLI->getTypeAction(I->getContext(), TLI->getValueType(I->getType())) ==
|
|
||||||
TargetLowering::TypeExpandInteger)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// If the load has other users and the truncate is not free, this probably
|
// If the load has other users and the truncate is not free, this probably
|
||||||
// isn't worthwhile.
|
// isn't worthwhile.
|
||||||
if (!LI->hasOneUse() &&
|
if (!LI->hasOneUse() &&
|
||||||
@ -2833,8 +2811,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
|
|||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
|
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
|
||||||
if (SinkExtExpand(CI))
|
|
||||||
return true;
|
|
||||||
bool MadeChange = MoveExtToFormExtLoad(I);
|
bool MadeChange = MoveExtToFormExtLoad(I);
|
||||||
return MadeChange | OptimizeExtUses(I);
|
return MadeChange | OptimizeExtUses(I);
|
||||||
}
|
}
|
||||||
|
@ -1444,6 +1444,54 @@ it would be nice to produce "into" someday.
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
This code:
|
||||||
|
|
||||||
|
void vec_mpys1(int y[], const int x[], int scaler) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 150; i++)
|
||||||
|
y[i] += (((long long)scaler * (long long)x[i]) >> 31);
|
||||||
|
}
|
||||||
|
|
||||||
|
Compiles to this loop with GCC 3.x:
|
||||||
|
|
||||||
|
.L5:
|
||||||
|
movl %ebx, %eax
|
||||||
|
imull (%edi,%ecx,4)
|
||||||
|
shrdl $31, %edx, %eax
|
||||||
|
addl %eax, (%esi,%ecx,4)
|
||||||
|
incl %ecx
|
||||||
|
cmpl $149, %ecx
|
||||||
|
jle .L5
|
||||||
|
|
||||||
|
llvm-gcc compiles it to the much uglier:
|
||||||
|
|
||||||
|
LBB1_1: ## bb1
|
||||||
|
movl 24(%esp), %eax
|
||||||
|
movl (%eax,%edi,4), %ebx
|
||||||
|
movl %ebx, %ebp
|
||||||
|
imull %esi, %ebp
|
||||||
|
movl %ebx, %eax
|
||||||
|
mull %ecx
|
||||||
|
addl %ebp, %edx
|
||||||
|
sarl $31, %ebx
|
||||||
|
imull %ecx, %ebx
|
||||||
|
addl %edx, %ebx
|
||||||
|
shldl $1, %eax, %ebx
|
||||||
|
movl 20(%esp), %eax
|
||||||
|
addl %ebx, (%eax,%edi,4)
|
||||||
|
incl %edi
|
||||||
|
cmpl $150, %edi
|
||||||
|
jne LBB1_1 ## bb1
|
||||||
|
|
||||||
|
The issue is that we hoist the cast of "scaler" to long long outside of the
|
||||||
|
loop, the value comes into the loop as two values, and
|
||||||
|
RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
|
||||||
|
constructed BUILD_PAIR which represents the cast value.
|
||||||
|
|
||||||
|
This can be handled by making CodeGenPrepare sink the cast.
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
Test instructions can be eliminated by using EFLAGS values from arithmetic
|
Test instructions can be eliminated by using EFLAGS values from arithmetic
|
||||||
instructions. This is currently not done for mul, and, or, xor, neg, shl,
|
instructions. This is currently not done for mul, and, or, xor, neg, shl,
|
||||||
sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
|
sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
|
||||||
|
@ -1,32 +0,0 @@
|
|||||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
|
||||||
|
|
||||||
define void @test(i64* nocapture %arr, i64 %arrsize, i64 %factor) nounwind uwtable {
|
|
||||||
%1 = icmp sgt i64 %arrsize, 0
|
|
||||||
br i1 %1, label %.lr.ph, label %._crit_edge
|
|
||||||
|
|
||||||
.lr.ph: ; preds = %0
|
|
||||||
%2 = sext i64 %factor to i128
|
|
||||||
br label %3
|
|
||||||
|
|
||||||
; <label>:3 ; preds = %3, %.lr.ph
|
|
||||||
; CHECK-NOT: mul
|
|
||||||
; CHECK: imulq
|
|
||||||
; CHECK-NOT: mul
|
|
||||||
%carry.02 = phi i128 [ 0, %.lr.ph ], [ %10, %3 ]
|
|
||||||
%i.01 = phi i64 [ 0, %.lr.ph ], [ %11, %3 ]
|
|
||||||
%4 = getelementptr inbounds i64* %arr, i64 %i.01
|
|
||||||
%5 = load i64* %4, align 8
|
|
||||||
%6 = sext i64 %5 to i128
|
|
||||||
%7 = mul nsw i128 %6, %2
|
|
||||||
%8 = add nsw i128 %7, %carry.02
|
|
||||||
%.tr = trunc i128 %8 to i64
|
|
||||||
%9 = and i64 %.tr, 9223372036854775807
|
|
||||||
store i64 %9, i64* %4, align 8
|
|
||||||
%10 = ashr i128 %8, 63
|
|
||||||
%11 = add nsw i64 %i.01, 1
|
|
||||||
%exitcond = icmp eq i64 %11, %arrsize
|
|
||||||
br i1 %exitcond, label %._crit_edge, label %3
|
|
||||||
|
|
||||||
._crit_edge: ; preds = %3, %0
|
|
||||||
ret void
|
|
||||||
}
|
|
@ -1,46 +0,0 @@
|
|||||||
; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
|
||||||
|
|
||||||
; The first cast should be sunk into block2, in order that the
|
|
||||||
; instruction selector can form an efficient
|
|
||||||
; i64 * i64 -> i128 multiplication.
|
|
||||||
define i128 @sink(i64* %mem1, i64* %mem2) {
|
|
||||||
; CHECK-LABEL: block1:
|
|
||||||
; CHECK-NEXT: load
|
|
||||||
block1:
|
|
||||||
%l1 = load i64* %mem1
|
|
||||||
%s1 = sext i64 %l1 to i128
|
|
||||||
br label %block2
|
|
||||||
|
|
||||||
; CHECK-LABEL: block2:
|
|
||||||
; CHECK-NEXT: sext
|
|
||||||
; CHECK-NEXT: load
|
|
||||||
; CHECK-NEXT: sext
|
|
||||||
block2:
|
|
||||||
%l2 = load i64* %mem2
|
|
||||||
%s2 = sext i64 %l2 to i128
|
|
||||||
%res = mul i128 %s1, %s2
|
|
||||||
ret i128 %res
|
|
||||||
}
|
|
||||||
|
|
||||||
; The first cast should be hoisted into block1, in order that the
|
|
||||||
; instruction selector can form an extend-load.
|
|
||||||
define i64 @hoist(i32* %mem1, i32* %mem2) {
|
|
||||||
; CHECK-LABEL: block1:
|
|
||||||
; CHECK-NEXT: load
|
|
||||||
; CHECK-NEXT: sext
|
|
||||||
block1:
|
|
||||||
%l1 = load i32* %mem1
|
|
||||||
br label %block2
|
|
||||||
|
|
||||||
; CHECK-LABEL: block2:
|
|
||||||
; CHECK-NEXT: load
|
|
||||||
; CHECK-NEXT: sext
|
|
||||||
block2:
|
|
||||||
%s1 = sext i32 %l1 to i64
|
|
||||||
%l2 = load i32* %mem2
|
|
||||||
%s2 = sext i32 %l2 to i64
|
|
||||||
%res = mul i64 %s1, %s2
|
|
||||||
ret i64 %res
|
|
||||||
}
|
|
Reference in New Issue
Block a user