llvm-6502/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
David Blaikie 7c9c6ed761 [opaque pointer type] Add textual IR support for explicit type parameter to load instruction
Essentially the same as the GEP change in r230786.

A similar migration script can be used to update test cases, though a few more
test case improvements/changes were required this time around: (r229269-r229278)

import fileinput
import sys
import re

pat = re.compile(r"((?:=|:|^)\s*load (?:atomic )?(?:volatile )?(.*?))(| addrspace\(\d+\) *)\*($| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$)")

for line in sys.stdin:
  sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line))

Reviewers: rafael, dexonsmith, grosser

Differential Revision: http://reviews.llvm.org/D7649

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230794 91177308-0d34-0410-b5e6-96231b3b80d8
2015-02-27 21:17:42 +00:00

97 lines
3.6 KiB
LLVM

; REQUIRES: asserts
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
; @sharedidx is an unrolled variant of this loop:
; for (unsigned long i = 0; i < len; i += s) {
; c[i] = a[i] + b[i];
; }
; where 's' cannot be folded into the addressing mode.
;
; This is not quite profitable to chain. But with -stress-ivchain, we
; can form three address chains in place of the shared induction
; variable.
; X64: sharedidx:
; X64: %for.body.preheader
; X64-NOT: leal ({{.*}},4)
; X64: %for.body.1
; X32: sharedidx:
; X32: %for.body.2
; X32: add
; X32: add
; X32: add
; X32: add
; X32: add
; X32: %for.body.3
define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
entry:
%cmp8 = icmp eq i32 %len, 0
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body.3
%i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
%0 = load i8, i8* %arrayidx, align 1
%conv6 = zext i8 %0 to i32
%arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
%1 = load i8, i8* %arrayidx1, align 1
%conv27 = zext i8 %1 to i32
%add = add nsw i32 %conv27, %conv6
%conv3 = trunc i32 %add to i8
%arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09
store i8 %conv3, i8* %arrayidx4, align 1
%add5 = add i32 %i.09, %s
%cmp = icmp ult i32 %add5, %len
br i1 %cmp, label %for.body.1, label %for.end
for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
ret void
for.body.1: ; preds = %for.body
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
%2 = load i8, i8* %arrayidx.1, align 1
%conv6.1 = zext i8 %2 to i32
%arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
%3 = load i8, i8* %arrayidx1.1, align 1
%conv27.1 = zext i8 %3 to i32
%add.1 = add nsw i32 %conv27.1, %conv6.1
%conv3.1 = trunc i32 %add.1 to i8
%arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5
store i8 %conv3.1, i8* %arrayidx4.1, align 1
%add5.1 = add i32 %add5, %s
%cmp.1 = icmp ult i32 %add5.1, %len
br i1 %cmp.1, label %for.body.2, label %for.end
for.body.2: ; preds = %for.body.1
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
%4 = load i8, i8* %arrayidx.2, align 1
%conv6.2 = zext i8 %4 to i32
%arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
%5 = load i8, i8* %arrayidx1.2, align 1
%conv27.2 = zext i8 %5 to i32
%add.2 = add nsw i32 %conv27.2, %conv6.2
%conv3.2 = trunc i32 %add.2 to i8
%arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1
store i8 %conv3.2, i8* %arrayidx4.2, align 1
%add5.2 = add i32 %add5.1, %s
%cmp.2 = icmp ult i32 %add5.2, %len
br i1 %cmp.2, label %for.body.3, label %for.end
for.body.3: ; preds = %for.body.2
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
%6 = load i8, i8* %arrayidx.3, align 1
%conv6.3 = zext i8 %6 to i32
%arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
%7 = load i8, i8* %arrayidx1.3, align 1
%conv27.3 = zext i8 %7 to i32
%add.3 = add nsw i32 %conv27.3, %conv6.3
%conv3.3 = trunc i32 %add.3 to i8
%arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2
store i8 %conv3.3, i8* %arrayidx4.3, align 1
%add5.3 = add i32 %add5.2, %s
%cmp.3 = icmp ult i32 %add5.3, %len
br i1 %cmp.3, label %for.body, label %for.end
}