mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-10 20:33:15 +00:00
misched: better alias analysis.
This fixes a hole in the "cheap" alias analysis logic implemented within the DAG builder itself, regardless of whether proper alias analysis is enabled. It now handles this pattern produced by LSR+CodeGenPrepare. %sunkaddr1 = ptrtoint * %obj to i64 %sunkaddr2 = add i64 %sunkaddr1, %lsr.iv %sunkaddr3 = inttoptr i64 %sunkaddr2 to i32* store i32 %v, i32* %sunkaddr3 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168768 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b422104f35
commit
8f82a08673
@ -67,7 +67,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
|
||||
// regular getUnderlyingObjectFromInt.
|
||||
if (U->getOpcode() == Instruction::PtrToInt)
|
||||
return U->getOperand(0);
|
||||
// If we find an add of a constant or a multiplied value, it's
|
||||
// If we find an add of a constant, a multiplied value, or a phi, it's
|
||||
// likely that the other operand will lead us to the base
|
||||
// object. We don't have to worry about the case where the
|
||||
// object address is somehow being computed by the multiply,
|
||||
@ -75,7 +75,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
|
||||
// identifiable object.
|
||||
if (U->getOpcode() != Instruction::Add ||
|
||||
(!isa<ConstantInt>(U->getOperand(1)) &&
|
||||
Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
|
||||
Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
|
||||
!isa<PHINode>(U->getOperand(1))))
|
||||
return V;
|
||||
V = U->getOperand(0);
|
||||
} else {
|
||||
|
127
test/CodeGen/X86/misched-matrix.ll
Normal file
127
test/CodeGen/X86/misched-matrix.ll
Normal file
@ -0,0 +1,127 @@
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
|
||||
; RUN: -misched-topdown -verify-machineinstrs \
|
||||
; RUN: | FileCheck %s -check-prefix=TOPDOWN
|
||||
;
|
||||
; Verify that the MI scheduler minimizes register pressure for a
|
||||
; uniform set of bottom-up subtrees (unrolled matrix multiply).
|
||||
;
|
||||
; For current top-down heuristics, ensure that some folded imulls have
|
||||
; been reordered with the stores. This tests the scheduler's cheap
|
||||
; alias analysis ability (that doesn't require any AliasAnalysis pass).
|
||||
;
|
||||
; TOPDOWN: %for.body
|
||||
; TOPDOWN: movl %{{.*}}, (
|
||||
; TOPDOWN: imull {{[0-9]*}}(
|
||||
; TOPDOWN: movl %{{.*}}, 4(
|
||||
; TOPDOWN: imull {{[0-9]*}}(
|
||||
; TOPDOWN: movl %{{.*}}, 8(
|
||||
; TOPDOWN: movl %{{.*}}, 12(
|
||||
; TOPDOWN: %for.end
|
||||
|
||||
define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
|
||||
[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
|
||||
%tmp = load i32* %arrayidx8, align 4, !tbaa !0
|
||||
%arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
|
||||
%tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
|
||||
%arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
|
||||
%tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
|
||||
%arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
|
||||
%tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
|
||||
%arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
|
||||
%tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
|
||||
%arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
|
||||
%tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
|
||||
%arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
|
||||
%tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
|
||||
%arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
|
||||
%tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
|
||||
%arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
|
||||
%tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
|
||||
%tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
|
||||
%arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
|
||||
%tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
|
||||
%tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
|
||||
%arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
|
||||
%tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
|
||||
%tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
|
||||
%arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
|
||||
%tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
|
||||
%tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
|
||||
%arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
|
||||
%tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
|
||||
%tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
|
||||
%arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
|
||||
%tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
|
||||
%tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
|
||||
%arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
|
||||
%tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
|
||||
%tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
|
||||
%arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
|
||||
%tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
|
||||
%tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
|
||||
%arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
|
||||
%tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
|
||||
%tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
|
||||
%arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
|
||||
%tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
|
||||
%tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
|
||||
%arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
|
||||
%tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
|
||||
%tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
|
||||
%arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
|
||||
%tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
|
||||
%tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
|
||||
%mul = mul nsw i32 %tmp1, %tmp
|
||||
%mul.1 = mul nsw i32 %tmp3, %tmp2
|
||||
%mul.2 = mul nsw i32 %tmp5, %tmp4
|
||||
%mul.3 = mul nsw i32 %tmp7, %tmp6
|
||||
%mul.138 = mul nsw i32 %tmp9, %tmp8
|
||||
%mul.1.1 = mul nsw i32 %tmp11, %tmp10
|
||||
%mul.2.1 = mul nsw i32 %tmp13, %tmp12
|
||||
%mul.3.1 = mul nsw i32 %tmp15, %tmp14
|
||||
%mul.240 = mul nsw i32 %tmp17, %tmp16
|
||||
%mul.1.2 = mul nsw i32 %tmp19, %tmp18
|
||||
%mul.2.2 = mul nsw i32 %tmp21, %tmp20
|
||||
%mul.3.2 = mul nsw i32 %tmp23, %tmp22
|
||||
%mul.342 = mul nsw i32 %tmp25, %tmp24
|
||||
%mul.1.3 = mul nsw i32 %tmp27, %tmp26
|
||||
%mul.2.3 = mul nsw i32 %tmp29, %tmp28
|
||||
%mul.3.3 = mul nsw i32 %tmp31, %tmp30
|
||||
%add.1 = add nsw i32 %mul.1, %mul
|
||||
%add.2 = add nsw i32 %mul.2, %add.1
|
||||
%add.3 = add nsw i32 %mul.3, %add.2
|
||||
%add.1.1 = add nsw i32 %mul.1.1, %mul.138
|
||||
%add.2.1 = add nsw i32 %mul.2.1, %add.1.1
|
||||
%add.3.1 = add nsw i32 %mul.3.1, %add.2.1
|
||||
%add.1.2 = add nsw i32 %mul.1.2, %mul.240
|
||||
%add.2.2 = add nsw i32 %mul.2.2, %add.1.2
|
||||
%add.3.2 = add nsw i32 %mul.3.2, %add.2.2
|
||||
%add.1.3 = add nsw i32 %mul.1.3, %mul.342
|
||||
%add.2.3 = add nsw i32 %mul.2.3, %add.1.3
|
||||
%add.3.3 = add nsw i32 %mul.3.3, %add.2.3
|
||||
%arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
|
||||
store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
|
||||
%arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
|
||||
store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
|
||||
%arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
|
||||
store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
|
||||
%arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
|
||||
store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 4
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = metadata !{metadata !"int", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
Loading…
x
Reference in New Issue
Block a user