LSR: rewrite inner loops only.

Rewriting the entire loop nest now requires -enable-lsr-nested. See PR11035 for some performance data. A few unit tests specifically test nested LSR, and are now under a flag. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140762 91177308-0d34-0410-b5e6-96231b3b80d8
2025-11-01 15:17:25 +00:00 · 2011-09-29 01:33:38 +00:00
parent 03b08764d2
commit 0c01bc385a
7 changed files with 45 additions and 14 deletions
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s

 ; LSR should recognize that this is an unrolled loop which can use
 ; constant offset addressing, so that each of the following stores
@@ -8,6 +8,9 @@
 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]

+; We can also save a register in the outer loop, but that requires
+; performing LSR on the outer loop.
+
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"

 %0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }