llvm-6502/test/CodeGen/ARM/lsr-unfolded-offset.ll

; RUN: llc -regalloc=greedy < %s | FileCheck %s

; LSR shouldn't introduce more induction variables than needed, increasing
; register pressure and therefore spilling. There is more room for improvement
; here.

; CHECK: sub sp, #{{40|32|28|24}}

; CHECK: %for.inc
; CHECK-NOT: ldr
; CHECK: add

target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
target triple = "thumbv7-apple-ios"

%struct.partition_entry = type { i32, i32, i64, i64 }

define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp {
entry:
  %cmp79 = icmp sgt i32 %num_entries, 0
  br i1 %cmp79, label %outer.loop, label %for.end72

outer.loop:                                 ; preds = %for.inc69, %entry
  %overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]
  %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
  %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
  %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
  %tmp5 = load i64* %offset, align 4
  %tmp15 = load i64* %len, align 4
  %add = add nsw i64 %tmp15, %tmp5
  br label %inner.loop

inner.loop:                                       ; preds = %for.inc, %outer.loop
  %overlap.178 = phi i32 [ %overlap.081, %outer.loop ], [ %overlap.4, %for.inc ]
  %1 = phi i32 [ 0, %outer.loop ], [ %inc, %for.inc ]
  %cmp23 = icmp eq i32 %0, %1
  br i1 %cmp23, label %for.inc, label %if.end

if.end:                                           ; preds = %inner.loop
  %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
  %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
  %tmp29 = load i64* %offset28, align 4
  %tmp40 = load i64* %len39, align 4
  %add41 = add nsw i64 %tmp40, %tmp29
  %cmp44 = icmp sge i64 %tmp29, %tmp5
  %cmp47 = icmp slt i64 %tmp29, %add
  %or.cond = and i1 %cmp44, %cmp47
  %overlap.2 = select i1 %or.cond, i32 1, i32 %overlap.178
  %cmp52 = icmp sle i64 %add41, %add
  %cmp56 = icmp sgt i64 %add41, %tmp5
  %or.cond74 = and i1 %cmp52, %cmp56
  %overlap.3 = select i1 %or.cond74, i32 1, i32 %overlap.2
  %cmp61 = icmp sgt i64 %tmp29, %tmp5
  %cmp65 = icmp slt i64 %add41, %add
  %or.cond75 = or i1 %cmp61, %cmp65
  br i1 %or.cond75, label %for.inc, label %if.then66

if.then66:                                        ; preds = %if.end
  br label %for.inc

for.inc:                                          ; preds = %if.end, %if.then66, %inner.loop
  %overlap.4 = phi i32 [ %overlap.178, %inner.loop ], [ 1, %if.then66 ], [ %overlap.3, %if.end ]
  %inc = add nsw i32 %1, 1
  %exitcond = icmp eq i32 %inc, %num_entries
  br i1 %exitcond, label %for.inc69, label %inner.loop

for.inc69:                                        ; preds = %for.inc
  %inc71 = add nsw i32 %0, 1
  %exitcond83 = icmp eq i32 %inc71, %num_entries
  br i1 %exitcond83, label %for.end72, label %outer.loop

for.end72:                                        ; preds = %for.inc69, %entry
  %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
  ret i32 %overlap.0.lcssa
}
Give this test an explicit register allocator, so that it can work even if the default register allocator is changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130883 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-04 23:14:02 +00:00			`; RUN: llc -regalloc=greedy < %s \| FileCheck %s`
Add an unfolded offset field to LSR's Formula record. This is used to model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130743 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-03 00:46:49 +00:00
			`; LSR shouldn't introduce more induction variables than needed, increasing`
			`; register pressure and therefore spilling. There is more room for improvement`
			`; here.`

Add dominance check for the instruction being hoisted. For example, MachineLICM should not hoist a load that is not guaranteed to be executed. Radar 10254254. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141689 91177308-0d34-0410-b5e6-96231b3b80d8 2011-10-11 18:09:58 +00:00			`; CHECK: sub sp, #{{40\|32\|28\|24}}`
Add an unfolded offset field to LSR's Formula record. This is used to model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130743 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-03 00:46:49 +00:00
Be more aggressive about following hints. RAGreedy::tryAssign will now evict interference from the preferred register even when another register is free. To support this, add the EvictionCost struct that counts how many hints are broken by an eviction. We don't want to break one hint just to satisfy another. Rename canEvict to shouldEvict, and add the first bit of eviction policy that doesn't depend on spill weights: Always make room in the preferred register as long as the evictees can be split and aren't already assigned to their preferred register. Also make the CSR avoidance more accurate. When looking for a cheaper register it is OK to use a new volatile register. Only CSR aliases that have never been used before should be avoided. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@134735 91177308-0d34-0410-b5e6-96231b3b80d8 2011-07-08 20:46:18 +00:00			`; CHECK: %for.inc`
Switch spill weights from a basic loop depth estimation to BlockFrequencyInfo. The main advantages here are way better heuristics, taking into account not just loop depth but also __builtin_expect and other static heuristics and will eventually learn how to use profile info. Most of the work in this patch is pushing the MachineBlockFrequencyInfo analysis into the right places. This is good for a 5% speedup on zlib's deflate (x86_64), there were some very unfortunate spilling decisions in its hottest loop in longest_match(). Other benchmarks I tried were mostly neutral. This changes register allocation in subtle ways, update the tests for it. 2012-02-20-MachineCPBug.ll was deleted as it's very fragile and the instruction it looked for was gone already (but the FileCheck pattern picked up unrelated stuff). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184105 91177308-0d34-0410-b5e6-96231b3b80d8 2013-06-17 19:00:36 +00:00			`; CHECK-NOT: ldr`
Be more aggressive about following hints. RAGreedy::tryAssign will now evict interference from the preferred register even when another register is free. To support this, add the EvictionCost struct that counts how many hints are broken by an eviction. We don't want to break one hint just to satisfy another. Rename canEvict to shouldEvict, and add the first bit of eviction policy that doesn't depend on spill weights: Always make room in the preferred register as long as the evictees can be split and aren't already assigned to their preferred register. Also make the CSR avoidance more accurate. When looking for a cheaper register it is OK to use a new volatile register. Only CSR aliases that have never been used before should be avoided. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@134735 91177308-0d34-0410-b5e6-96231b3b80d8 2011-07-08 20:46:18 +00:00			`; CHECK: add`
Add an unfolded offset field to LSR's Formula record. This is used to model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130743 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-03 00:46:49 +00:00
			`target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"`
Fix more places which should be checking for iOS, not darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147513 91177308-0d34-0410-b5e6-96231b3b80d8 2012-01-04 01:55:04 +00:00			`target triple = "thumbv7-apple-ios"`
Add an unfolded offset field to LSR's Formula record. This is used to model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130743 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-03 00:46:49 +00:00
			`%struct.partition_entry = type { i32, i32, i64, i64 }`

			`define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp {`
			`entry:`
			`%cmp79 = icmp sgt i32 %num_entries, 0`
			`br i1 %cmp79, label %outer.loop, label %for.end72`

			`outer.loop: ; preds = %for.inc69, %entry`
			`%overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]`
			`%0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]`
			`%offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2`
			`%len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3`
TBAA: remove !tbaa from testing cases if not used. This will make it easier to turn on struct-path aware TBAA since the metadata format will change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180796 91177308-0d34-0410-b5e6-96231b3b80d8 2013-04-30 17:52:57 +00:00			`%tmp5 = load i64* %offset, align 4`
			`%tmp15 = load i64* %len, align 4`
Add an unfolded offset field to LSR's Formula record. This is used to model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130743 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-03 00:46:49 +00:00			`%add = add nsw i64 %tmp15, %tmp5`
			`br label %inner.loop`

			`inner.loop: ; preds = %for.inc, %outer.loop`
			`%overlap.178 = phi i32 [ %overlap.081, %outer.loop ], [ %overlap.4, %for.inc ]`
			`%1 = phi i32 [ 0, %outer.loop ], [ %inc, %for.inc ]`
			`%cmp23 = icmp eq i32 %0, %1`
			`br i1 %cmp23, label %for.inc, label %if.end`

			`if.end: ; preds = %inner.loop`
			`%len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3`
			`%offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2`
TBAA: remove !tbaa from testing cases if not used. This will make it easier to turn on struct-path aware TBAA since the metadata format will change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180796 91177308-0d34-0410-b5e6-96231b3b80d8 2013-04-30 17:52:57 +00:00			`%tmp29 = load i64* %offset28, align 4`
			`%tmp40 = load i64* %len39, align 4`
Add an unfolded offset field to LSR's Formula record. This is used to model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130743 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-03 00:46:49 +00:00			`%add41 = add nsw i64 %tmp40, %tmp29`
			`%cmp44 = icmp sge i64 %tmp29, %tmp5`
			`%cmp47 = icmp slt i64 %tmp29, %add`
			`%or.cond = and i1 %cmp44, %cmp47`
			`%overlap.2 = select i1 %or.cond, i32 1, i32 %overlap.178`
			`%cmp52 = icmp sle i64 %add41, %add`
			`%cmp56 = icmp sgt i64 %add41, %tmp5`
			`%or.cond74 = and i1 %cmp52, %cmp56`
			`%overlap.3 = select i1 %or.cond74, i32 1, i32 %overlap.2`
			`%cmp61 = icmp sgt i64 %tmp29, %tmp5`
			`%cmp65 = icmp slt i64 %add41, %add`
			`%or.cond75 = or i1 %cmp61, %cmp65`
			`br i1 %or.cond75, label %for.inc, label %if.then66`

			`if.then66: ; preds = %if.end`
			`br label %for.inc`

			`for.inc: ; preds = %if.end, %if.then66, %inner.loop`
			`%overlap.4 = phi i32 [ %overlap.178, %inner.loop ], [ 1, %if.then66 ], [ %overlap.3, %if.end ]`
			`%inc = add nsw i32 %1, 1`
			`%exitcond = icmp eq i32 %inc, %num_entries`
			`br i1 %exitcond, label %for.inc69, label %inner.loop`

			`for.inc69: ; preds = %for.inc`
			`%inc71 = add nsw i32 %0, 1`
			`%exitcond83 = icmp eq i32 %inc71, %num_entries`
			`br i1 %exitcond83, label %for.end72, label %outer.loop`

			`for.end72: ; preds = %for.inc69, %entry`
			`%overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]`
			`ret i32 %overlap.0.lcssa`
			`}`