From 1d59f5fa53cac23b6debc1d7214451c65b0399a7 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 25 Dec 2012 23:21:29 +0000 Subject: [PATCH] LoopVectorize: Enable vectorization of the fmuladd intrinsic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171076 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 1 + test/Transforms/LoopVectorize/intrinsic.ll | 60 ++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index d571903984c..b8b934a07d3 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -711,6 +711,7 @@ isTriviallyVectorizableIntrinsic(Instruction *Inst) { case Intrinsic::nearbyint: case Intrinsic::pow: case Intrinsic::fma: + case Intrinsic::fmuladd: return true; default: return false; diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index 54e3c69fe1a..e032041bc09 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -788,6 +788,66 @@ for.end: ; preds = %for.body, %entry declare double @llvm.fma.f64(double, double, double) nounwind readnone +;CHECK: @fmuladd_f32 +;CHECK: llvm.fmuladd.v4f32 +;CHECK: ret void +define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable { +entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4, !tbaa !0 + %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv + %2 = load float* %arrayidx4, align 4, !tbaa !0 + %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1) + %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %3, float* %arrayidx6, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone + +;CHECK: @fmuladd_f64 +;CHECK: llvm.fmuladd.v4f64 +;CHECK: ret void +define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable { +entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8, !tbaa !3 + %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv + %1 = load double* %arrayidx2, align 8, !tbaa !3 + %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv + %2 = load double* %arrayidx4, align 8, !tbaa !3 + %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1) + %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %3, double* %arrayidx6, align 8, !tbaa !3 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone + ;CHECK: @pow_f32 ;CHECK: llvm.pow.v4f32 ;CHECK: ret void