llvm-6502/test/CodeGen/PowerPC/ctrloops.ll

target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-freebsd10.0"
; RUN: llc < %s -march=ppc64 -relocation-model=pic | FileCheck %s

@a = common global i32 0, align 4

define void @test1(i32 %c) nounwind {
entry:
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %0 = load volatile i32, i32* @a, align 4
  %add = add nsw i32 %0, %c
  store volatile i32 %add, i32* @a, align 4
  %inc = add nsw i32 %i.01, 1
  %exitcond = icmp eq i32 %inc, 2048
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  ret void
; CHECK: @test1
; CHECK-NOT: or 3, 3, 3
; CHECK: mtctr
; CHECK-NOT: addi {[0-9]+}
; CHECK-NOT: cmplwi
; CHECK: bdnz
}

define void @test2(i32 %c, i32 %d) nounwind {
entry:
  %cmp1 = icmp sgt i32 %d, 0
  br i1 %cmp1, label %for.body, label %for.end

for.body:                                         ; preds = %entry, %for.body
  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %0 = load volatile i32, i32* @a, align 4
  %add = add nsw i32 %0, %c
  store volatile i32 %add, i32* @a, align 4
  %inc = add nsw i32 %i.02, 1
  %exitcond = icmp eq i32 %inc, %d
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test2
; CHECK: mtctr
; CHECK-NOT: addi {[0-9]+}
; CHECK-NOT: cmplwi
; CHECK: bdnz
}

define void @test3(i32 %c, i32 %d) nounwind {
entry:
  %cmp1 = icmp sgt i32 %d, 0
  br i1 %cmp1, label %for.body, label %for.end

for.body:                                         ; preds = %entry, %for.body
  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %mul = mul nsw i32 %i.02, %c
  %0 = load volatile i32, i32* @a, align 4
  %add = add nsw i32 %0, %mul
  store volatile i32 %add, i32* @a, align 4
  %inc = add nsw i32 %i.02, 1
  %exitcond = icmp eq i32 %inc, %d
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test3
; CHECK: mtctr
; CHECK-NOT: addi {[0-9]+}
; CHECK-NOT: cmplwi
; CHECK: bdnz
}

@tls_var = external thread_local global i8

define i32 @test4() {
entry:
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %phi = phi i32 [ %dec, %for.body ], [ undef, %entry ]
  %load = ptrtoint i8* @tls_var to i32
  %dec = add i32 %phi, -1
  %cmp = icmp sgt i32 %phi, 1
  br i1 %cmp, label %for.body, label %return

return:                                           ; preds = %for.body
  ret i32 %load
; CHECK-LABEL: @test4
; CHECK-NOT: mtctr
; CHECK: addi {{[0-9]+}}
; CHECK: cmpwi
; CHECK-NOT: bdnz
; CHECK: bgt
}
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"`
			`target triple = "powerpc64-unknown-freebsd10.0"`
PowerPC: CTR shouldn't fire if a TLS call is in the loop Determining the address of a TLS variable results in a function call in certain TLS models. This means that a simple ICmpInst might actually result in invalidating the CTR register. In such cases, do not attempt to rely on the CTR register for loop optimization purposes. This fixes PR22034. Differential Revision: http://reviews.llvm.org/D6786 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224890 91177308-0d34-0410-b5e6-96231b3b80d8 2014-12-27 19:45:38 +00:00			`; RUN: llc < %s -march=ppc64 -relocation-model=pic \| FileCheck %s`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00
			`@a = common global i32 0, align 4`

			`define void @test1(i32 %c) nounwind {`
			`entry:`
			`br label %for.body`

			`for.body: ; preds = %for.body, %entry`
			`%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230794 91177308-0d34-0410-b5e6-96231b3b80d8 2015-02-27 21:17:42 +00:00			`%0 = load volatile i32, i32* @a, align 4`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`%add = add nsw i32 %0, %c`
TBAA: remove !tbaa from testing cases if not used. This will make it easier to turn on struct-path aware TBAA since the metadata format will change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180796 91177308-0d34-0410-b5e6-96231b3b80d8 2013-04-30 17:52:57 +00:00			`store volatile i32 %add, i32* @a, align 4`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`%inc = add nsw i32 %i.01, 1`
			`%exitcond = icmp eq i32 %inc, 2048`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body`
			`ret void`
			`; CHECK: @test1`
Improve ext/trunc patterns on PPC64. The PPC64 backend had patterns for i32 <-> i64 extensions and truncations that would leave self-moves in the final assembly. Replacing those patterns with ones based on the SUBREG builtins yields better-looking code. Thanks to Jakob and Owen for their suggestions in this matter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158283 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-09 22:10:19 +00:00			`; CHECK-NOT: or 3, 3, 3`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`; CHECK: mtctr`
Implement PPC counter loops as a late IR-level pass The old PPCCTRLoops pass, like the Hexagon pass version from which it was derived, could only handle some simple loops in canonical form. We cannot directly adapt the new Hexagon hardware loops pass, however, because the Hexagon pass contains a fundamental assumption that non-constant-trip-count loops will contain a guard, and this is not always true (the result being that incorrect negative counts can be generated). With this commit, we replace the pass with a late IR-level pass which makes use of SE to calculate the backedge-taken counts and safely generate the loop-count expressions (including any necessary max() parts). This IR level pass inserts custom intrinsics that are lowered into the desired decrement-and-branch instructions. The most fragile part of this new implementation is that interfering uses of the counter register must be detected on the IR level (and, on PPC, this also includes any indirect branches in addition to function calls). Also, to make all of this work, we need a variant of the mtctr instruction that is marked as having side effects. Without this, machine-code level CSE, DCE, etc. illegally transform the resulting code. Hopefully, this can be improved in the future. This new pass is smaller than the original (and much smaller than the new Hexagon hardware loops pass), and can handle many additional cases correctly. In addition, the preheader-creation code has been copied from LoopSimplify, and after we decide on where it belongs, this code will be refactored so that it can be explicitly shared (making this implementation even smaller). The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for the new Hexagon pass. There are a few classes of loops that this pass does not transform (noted by FIXMEs in the files), but these deficiencies can be addressed within the SE infrastructure (thus helping many other passes as well). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8 2013-05-15 21:37:41 +00:00			`; CHECK-NOT: addi {[0-9]+}`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`; CHECK-NOT: cmplwi`
			`; CHECK: bdnz`
			`}`

			`define void @test2(i32 %c, i32 %d) nounwind {`
			`entry:`
			`%cmp1 = icmp sgt i32 %d, 0`
			`br i1 %cmp1, label %for.body, label %for.end`

			`for.body: ; preds = %entry, %for.body`
			`%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230794 91177308-0d34-0410-b5e6-96231b3b80d8 2015-02-27 21:17:42 +00:00			`%0 = load volatile i32, i32* @a, align 4`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`%add = add nsw i32 %0, %c`
TBAA: remove !tbaa from testing cases if not used. This will make it easier to turn on struct-path aware TBAA since the metadata format will change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180796 91177308-0d34-0410-b5e6-96231b3b80d8 2013-04-30 17:52:57 +00:00			`store volatile i32 %add, i32* @a, align 4`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`%inc = add nsw i32 %i.02, 1`
			`%exitcond = icmp eq i32 %inc, %d`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test2`
			`; CHECK: mtctr`
Implement PPC counter loops as a late IR-level pass The old PPCCTRLoops pass, like the Hexagon pass version from which it was derived, could only handle some simple loops in canonical form. We cannot directly adapt the new Hexagon hardware loops pass, however, because the Hexagon pass contains a fundamental assumption that non-constant-trip-count loops will contain a guard, and this is not always true (the result being that incorrect negative counts can be generated). With this commit, we replace the pass with a late IR-level pass which makes use of SE to calculate the backedge-taken counts and safely generate the loop-count expressions (including any necessary max() parts). This IR level pass inserts custom intrinsics that are lowered into the desired decrement-and-branch instructions. The most fragile part of this new implementation is that interfering uses of the counter register must be detected on the IR level (and, on PPC, this also includes any indirect branches in addition to function calls). Also, to make all of this work, we need a variant of the mtctr instruction that is marked as having side effects. Without this, machine-code level CSE, DCE, etc. illegally transform the resulting code. Hopefully, this can be improved in the future. This new pass is smaller than the original (and much smaller than the new Hexagon hardware loops pass), and can handle many additional cases correctly. In addition, the preheader-creation code has been copied from LoopSimplify, and after we decide on where it belongs, this code will be refactored so that it can be explicitly shared (making this implementation even smaller). The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for the new Hexagon pass. There are a few classes of loops that this pass does not transform (noted by FIXMEs in the files), but these deficiencies can be addressed within the SE infrastructure (thus helping many other passes as well). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8 2013-05-15 21:37:41 +00:00			`; CHECK-NOT: addi {[0-9]+}`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`; CHECK-NOT: cmplwi`
			`; CHECK: bdnz`
			`}`

			`define void @test3(i32 %c, i32 %d) nounwind {`
			`entry:`
			`%cmp1 = icmp sgt i32 %d, 0`
			`br i1 %cmp1, label %for.body, label %for.end`

			`for.body: ; preds = %entry, %for.body`
			`%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]`
			`%mul = mul nsw i32 %i.02, %c`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230794 91177308-0d34-0410-b5e6-96231b3b80d8 2015-02-27 21:17:42 +00:00			`%0 = load volatile i32, i32* @a, align 4`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`%add = add nsw i32 %0, %mul`
TBAA: remove !tbaa from testing cases if not used. This will make it easier to turn on struct-path aware TBAA since the metadata format will change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180796 91177308-0d34-0410-b5e6-96231b3b80d8 2013-04-30 17:52:57 +00:00			`store volatile i32 %add, i32* @a, align 4`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`%inc = add nsw i32 %i.02, 1`
			`%exitcond = icmp eq i32 %inc, %d`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test3`
			`; CHECK: mtctr`
Implement PPC counter loops as a late IR-level pass The old PPCCTRLoops pass, like the Hexagon pass version from which it was derived, could only handle some simple loops in canonical form. We cannot directly adapt the new Hexagon hardware loops pass, however, because the Hexagon pass contains a fundamental assumption that non-constant-trip-count loops will contain a guard, and this is not always true (the result being that incorrect negative counts can be generated). With this commit, we replace the pass with a late IR-level pass which makes use of SE to calculate the backedge-taken counts and safely generate the loop-count expressions (including any necessary max() parts). This IR level pass inserts custom intrinsics that are lowered into the desired decrement-and-branch instructions. The most fragile part of this new implementation is that interfering uses of the counter register must be detected on the IR level (and, on PPC, this also includes any indirect branches in addition to function calls). Also, to make all of this work, we need a variant of the mtctr instruction that is marked as having side effects. Without this, machine-code level CSE, DCE, etc. illegally transform the resulting code. Hopefully, this can be improved in the future. This new pass is smaller than the original (and much smaller than the new Hexagon hardware loops pass), and can handle many additional cases correctly. In addition, the preheader-creation code has been copied from LoopSimplify, and after we decide on where it belongs, this code will be refactored so that it can be explicitly shared (making this implementation even smaller). The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for the new Hexagon pass. There are a few classes of loops that this pass does not transform (noted by FIXMEs in the files), but these deficiencies can be addressed within the SE infrastructure (thus helping many other passes as well). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8 2013-05-15 21:37:41 +00:00			`; CHECK-NOT: addi {[0-9]+}`
Add the PPCCTRLoops pass: a PPC machine-code-level optimization pass to form CTR-based loop branching code. This pass is derived from the Hexagon HardwareLoops pass. The only significant enhancement over the Hexagon pass is that PPCCTRLoops will also attempt to delete the replaced add and compare operations if they are no longer otherwise used. Also, invalid preheader DebugLoc is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158204 91177308-0d34-0410-b5e6-96231b3b80d8 2012-06-08 15:38:21 +00:00			`; CHECK-NOT: cmplwi`
			`; CHECK: bdnz`
			`}`
PowerPC: CTR shouldn't fire if a TLS call is in the loop Determining the address of a TLS variable results in a function call in certain TLS models. This means that a simple ICmpInst might actually result in invalidating the CTR register. In such cases, do not attempt to rely on the CTR register for loop optimization purposes. This fixes PR22034. Differential Revision: http://reviews.llvm.org/D6786 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224890 91177308-0d34-0410-b5e6-96231b3b80d8 2014-12-27 19:45:38 +00:00
			`@tls_var = external thread_local global i8`

			`define i32 @test4() {`
			`entry:`
			`br label %for.body`

			`for.body: ; preds = %for.body, %entry`
			`%phi = phi i32 [ %dec, %for.body ], [ undef, %entry ]`
			`%load = ptrtoint i8* @tls_var to i32`
			`%dec = add i32 %phi, -1`
			`%cmp = icmp sgt i32 %phi, 1`
			`br i1 %cmp, label %for.body, label %return`

			`return: ; preds = %for.body`
			`ret i32 %load`
			`; CHECK-LABEL: @test4`
			`; CHECK-NOT: mtctr`
			`; CHECK: addi {{[0-9]+}}`
			`; CHECK: cmpwi`
			`; CHECK-NOT: bdnz`
			`; CHECK: bgt`
			`}`