From 73f8eff19e3b0d92ec41a4857b487e713b5e02b8 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 1 Sep 2014 18:44:57 +0000 Subject: [PATCH] Add a convenience method to copy wrapping, exact, and fast-math flags (NFC). The loop vectorizer preserves wrapping, exact, and fast-math properties of scalar instructions. This patch adds a convenience method to make that operation easier because we need to do this in the loop vectorizer, SLP vectorizer, and possibly other places. Although this is a 'no functional change' patch, I've added a testcase to verify that the exact flag is preserved by the loop vectorizer. The wrapping and fast-math flags are already checked in existing testcases. Differential Revision: http://reviews.llvm.org/D5138 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216886 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/InstrTypes.h | 4 ++++ lib/IR/Instructions.cpp | 16 +++++++++++++++ lib/Transforms/Vectorize/LoopVectorize.cpp | 16 +++------------ test/Transforms/LoopVectorize/exact.ll | 24 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 13 deletions(-) create mode 100644 test/Transforms/LoopVectorize/exact.ll diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index 981aad852b2..799f684349f 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -358,6 +358,10 @@ public: /// isExact - Determine whether the exact flag is set. bool isExact() const; + /// Convenience method to copy wrapping, exact, and fast-math flag values + /// from V to this instruction. + void copyFlags(const Value *V); + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->isBinaryOp(); diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 9553252f4e9..16993f1bba5 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -2030,6 +2030,22 @@ bool BinaryOperator::isExact() const { return cast(this)->isExact(); } +void BinaryOperator::copyFlags(const Value *V) { + // Copy the wrapping flags. + if (auto *OB = dyn_cast(V)) { + setHasNoSignedWrap(OB->hasNoSignedWrap()); + setHasNoUnsignedWrap(OB->hasNoUnsignedWrap()); + } + + // Copy the exact flag. + if (auto *PE = dyn_cast(V)) + setIsExact(PE->isExact()); + + // Copy the fast-math flags. + if (auto *FP = dyn_cast(V)) + setFastMathFlags(FP->getFastMathFlags()); +} + //===----------------------------------------------------------------------===// // FPMathOperator Class //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 5ca9106e652..ba330128788 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3248,19 +3248,9 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { for (unsigned Part = 0; Part < UF; ++Part) { Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); - // Update the NSW, NUW and Exact flags. Notice: V can be an Undef. - BinaryOperator *VecOp = dyn_cast(V); - if (VecOp && isa(BinOp)) { - VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); - VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); - } - if (VecOp && isa(VecOp)) - VecOp->setIsExact(BinOp->isExact()); - - // Copy the fast-math flags. - if (VecOp && isa(V)) - VecOp->setFastMathFlags(it->getFastMathFlags()); - + if (BinaryOperator *VecOp = dyn_cast(V)) + VecOp->copyFlags(BinOp); + Entry[Part] = V; } diff --git a/test/Transforms/LoopVectorize/exact.ll b/test/Transforms/LoopVectorize/exact.ll new file mode 100644 index 00000000000..0a8fbf33734 --- /dev/null +++ b/test/Transforms/LoopVectorize/exact.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; CHECK-LABEL: @lshr_exact( +; CHECK: lshr exact <4 x i32> +define void @lshr_exact(i32* %x) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %x, i64 %iv + %0 = load i32* %arrayidx, align 4 + %conv1 = lshr exact i32 %0, 1 + store i32 %conv1, i32* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +}