From d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21a Mon Sep 17 00:00:00 2001 From: Robert Lytton Date: Wed, 18 Sep 2013 12:43:35 +0000 Subject: [PATCH] Prevent LoopVectorizer and SLPVectorizer running if the target has no vector registers. XCore target: Add XCoreTargetTransformInfo This is where getNumberOfRegisters() resides, which in turn returns the number of vector registers (=0). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190936 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCore.h | 2 + lib/Target/XCore/XCoreTargetMachine.cpp | 8 ++ lib/Target/XCore/XCoreTargetMachine.h | 2 + lib/Target/XCore/XCoreTargetTransformInfo.cpp | 85 +++++++++++++++++++ lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++ lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++ .../BBVectorize/xcore/no-vector-registers.ll | 18 ++++ .../xcore/no-vector-registers.ll | 23 +++++ .../xcore/no-vector-registers.ll | 24 ++++++ 9 files changed, 172 insertions(+) create mode 100644 lib/Target/XCore/XCoreTargetTransformInfo.cpp create mode 100644 test/Transforms/BBVectorize/xcore/no-vector-registers.ll create mode 100644 test/Transforms/LoopVectorize/xcore/no-vector-registers.ll create mode 100644 test/Transforms/SLPVectorizer/xcore/no-vector-registers.ll diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index 2f375fc952c..73c310be034 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -31,6 +31,8 @@ namespace llvm { CodeGenOpt::Level OptLevel); ModulePass *createXCoreLowerThreadLocalPass(); + ImmutablePass *createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM); + } // end namespace llvm; #endif diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 3ef1520c71a..9ae0b860dff 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -70,3 +70,11 @@ bool XCorePassConfig::addInstSelector() { extern "C" void LLVMInitializeXCoreTarget() { RegisterTargetMachine X(TheXCoreTarget); } + +void XCoreTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our XCore pass. This + // allows the XCore pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(this)); + PM.add(createXCoreTargetTransformInfoPass(this)); +} diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index eb9a1aa420e..a19a67727f2 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -57,6 +57,8 @@ public: // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + + virtual void addAnalysisPasses(PassManagerBase &PM); }; } // end namespace llvm diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp new file mode 100644 index 00000000000..48621388348 --- /dev/null +++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp @@ -0,0 +1,85 @@ +//===-- XCoreTargetTransformInfo.cpp - XCore specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// XCore target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xcoretti" +#include "XCore.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeXCoreTTIPass(PassRegistry &); +} + +namespace { + +class XCoreTTI : public ImmutablePass, public TargetTransformInfo { + const XCoreTargetMachine *TM; + +public: + XCoreTTI() : ImmutablePass(ID), TM(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + XCoreTTI(const XCoreTargetMachine *TM) + : ImmutablePass(ID), TM(TM) { + initializeXCoreTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + static char ID; + + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + unsigned getNumberOfRegisters(bool Vector) const { + if (Vector) { + return 0; + } + return 12; + } +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(XCoreTTI, TargetTransformInfo, "xcoretti", + "XCore Target Transform Info", true, true, false) +char XCoreTTI::ID = 0; + + +ImmutablePass * +llvm::createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM) { + return new XCoreTTI(TM); +} diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1d82c7b8f54..30908c8ebf3 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -909,6 +909,11 @@ struct LoopVectorize : public LoopPass { DT = &getAnalysis(); TLI = getAnalysisIfAvailable(); + // If the target claims to have no vector registers don't attempt + // vectorization. + if (!TTI->getNumberOfRegisters(true)) + return false; + if (DL == NULL) { DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout"); return false; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index b287ca7c8d5..cd3f723cd3e 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1572,6 +1572,11 @@ struct SLPVectorizer : public FunctionPass { StoreRefs.clear(); bool Changed = false; + // If the target claims to have no vector registers don't attempt + // vectorization. + if (!TTI->getNumberOfRegisters(true)) + return false; + // Must have DataLayout. We can't require it because some tests run w/o // triple. if (!DL) diff --git a/test/Transforms/BBVectorize/xcore/no-vector-registers.ll b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll new file mode 100644 index 00000000000..9ebdb7368a3 --- /dev/null +++ b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s + +target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32" +target triple = "xcore" + +; Basic depth-3 chain +define double @test1(double %A1, double %A2, double %B1, double %B2) { +; CHECK-LABEL: @test1( +; CHECK-NOT: <2 x double> + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R +} diff --git a/test/Transforms/LoopVectorize/xcore/no-vector-registers.ll b/test/Transforms/LoopVectorize/xcore/no-vector-registers.ll new file mode 100644 index 00000000000..a099daa740e --- /dev/null +++ b/test/Transforms/LoopVectorize/xcore/no-vector-registers.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s + +target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32" +target triple = "xcore" +; The xcore target has no vector registers, so loop should not be vectorized. +;CHECK-LABEL: @f( +;CHECK: entry: +;CHECK-NOT: vector.body +;CHECK-NEXT: br label %do.body +define void @f(i8* nocapture %ptr, i32 %len) { +entry: + br label %do.body +do.body: + %ptr.addr.0 = phi i8* [ %ptr, %entry ], [ %incdec.ptr, %do.body ] + %len.addr.0 = phi i32 [ %len, %entry ], [ %dec, %do.body ] + %incdec.ptr = getelementptr inbounds i8* %ptr.addr.0, i32 1 + store i8 0, i8* %ptr.addr.0, align 1 + %dec = add nsw i32 %len.addr.0, -1 + %tobool = icmp eq i32 %len.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body +do.end: + ret void +} diff --git a/test/Transforms/SLPVectorizer/xcore/no-vector-registers.ll b/test/Transforms/SLPVectorizer/xcore/no-vector-registers.ll new file mode 100644 index 00000000000..66392e74cb3 --- /dev/null +++ b/test/Transforms/SLPVectorizer/xcore/no-vector-registers.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=xcore | FileCheck %s + +target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32" +target triple = "xcore" + +; Simple 3-pair chain with loads and stores +; CHECK: test1 +; CHECK-NOT: <2 x double> +define void @test1(double* %a, double* %b, double* %c) { +entry: + %i0 = load double* %a, align 8 + %i1 = load double* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double* %a, i64 1 + %i3 = load double* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double* %b, i64 1 + %i4 = load double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + store double %mul, double* %c, align 8 + %arrayidx5 = getelementptr inbounds double* %c, i64 1 + store double %mul5, double* %arrayidx5, align 8 + ret void +} +