mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
This commit allows vectorized loops to be unrolled by a factor of 2 for AArch64.
A new test case is also added for ARM64. Patched by Z.Zheng git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206563 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1b16515971
commit
a1da819896
@ -92,6 +92,7 @@ public:
|
||||
return 64;
|
||||
}
|
||||
|
||||
unsigned getMaximumUnrollFactor() const override { return 2; }
|
||||
/// @}
|
||||
};
|
||||
|
||||
|
42
test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
Normal file
42
test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: opt < %s -loop-vectorize -mtriple=aarch64-none-linux-gnu -mattr=+neon -S | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
|
||||
;CHECK-LABEL: array_add
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: add nsw <4 x i32>
|
||||
;CHECK: add nsw <4 x i32>
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: ret
|
||||
entry:
|
||||
%cmp10 = icmp sgt i32 %size, 0
|
||||
br i1 %cmp10, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32* %b, i64 %indvars.iv
|
||||
%1 = load i32* %arrayidx2, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
%arrayidx4 = getelementptr inbounds i32* %c, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx4, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %size
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret i32* %c
|
||||
}
|
42
test/Transforms/LoopVectorize/ARM64/arm64-unroll.ll
Normal file
42
test/Transforms/LoopVectorize/ARM64/arm64-unroll.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: opt < %s -loop-vectorize -mtriple=arm64-none-linux-gnu -mattr=+neon -S | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
|
||||
;CHECK-LABEL: array_add
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: add nsw <4 x i32>
|
||||
;CHECK: add nsw <4 x i32>
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: ret
|
||||
entry:
|
||||
%cmp10 = icmp sgt i32 %size, 0
|
||||
br i1 %cmp10, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32* %b, i64 %indvars.iv
|
||||
%1 = load i32* %arrayidx2, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
%arrayidx4 = getelementptr inbounds i32* %c, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx4, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %size
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret i32* %c
|
||||
}
|
Loading…
Reference in New Issue
Block a user