LoopVectorize: Enable vectorization of the fmuladd intrinsic

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171076 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2012-12-25 23:21:29 +00:00
parent 64a7a24edf
commit 1d59f5fa53
2 changed files with 61 additions and 0 deletions

View File

@ -711,6 +711,7 @@ isTriviallyVectorizableIntrinsic(Instruction *Inst) {
case Intrinsic::nearbyint:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
return true;
default:
return false;

View File

@ -788,6 +788,66 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.fma.f64(double, double, double) nounwind readnone
;CHECK: @fmuladd_f32
;CHECK: llvm.fmuladd.v4f32
;CHECK: ret void
define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
entry:
%cmp12 = icmp sgt i32 %n, 0
br i1 %cmp12, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
%0 = load float* %arrayidx, align 4, !tbaa !0
%arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
%1 = load float* %arrayidx2, align 4, !tbaa !0
%arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
%2 = load float* %arrayidx4, align 4, !tbaa !0
%3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
%arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
store float %3, float* %arrayidx6, align 4, !tbaa !0
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
;CHECK: @fmuladd_f64
;CHECK: llvm.fmuladd.v4f64
;CHECK: ret void
define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
entry:
%cmp12 = icmp sgt i32 %n, 0
br i1 %cmp12, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
%0 = load double* %arrayidx, align 8, !tbaa !3
%arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
%1 = load double* %arrayidx2, align 8, !tbaa !3
%arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
%2 = load double* %arrayidx4, align 8, !tbaa !3
%3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
%arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
store double %3, double* %arrayidx6, align 8, !tbaa !3
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
;CHECK: @pow_f32
;CHECK: llvm.pow.v4f32
;CHECK: ret void