From b09beed5408f859d8dabfb1a93766d2aedd19280 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 23 Dec 2013 14:45:00 +0000 Subject: [PATCH] Fix Scalarizer handling of vector GEPs with multiple index operands The old code only worked for one index operand. Also handle "inbounds". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197908 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/Scalarizer.cpp | 43 +++++++++++++++++++++------- test/Transforms/Scalarizer/basic.ll | 32 +++++++++++++++++++++ 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 36a01bd81df..33ccb7350d8 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -42,6 +42,8 @@ typedef SmallVector, 16> GatherList; // component of a scattered vector or vector pointer. class Scatterer { public: + Scatterer() {} + // Scatter V into Size components. If new instructions are needed, // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. @@ -97,16 +99,6 @@ struct BinarySplitter { BinaryOperator &BO; }; -// GEPSpliiter()(Builder, X, Y, Name) uses Builder to create -// a single GEP called Name with operands X and Y. -struct GEPSplitter { - GEPSplitter() {} - Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, - const Twine &Name) const { - return Builder.CreateGEP(Op0, Op1, Name); - } -}; - // Information about a load or store that we're scalarizing. struct VectorLayout { VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {} @@ -429,7 +421,36 @@ bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { } bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { - return splitBinary(GEPI, GEPSplitter()); + VectorType *VT = dyn_cast(GEPI.getType()); + if (!VT) + return false; + + IRBuilder<> Builder(GEPI.getParent(), &GEPI); + unsigned NumElems = VT->getNumElements(); + unsigned NumIndices = GEPI.getNumIndices(); + + Scatterer Base = scatter(&GEPI, GEPI.getOperand(0)); + + SmallVector Ops; + Ops.resize(NumIndices); + for (unsigned I = 0; I < NumIndices; ++I) + Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1)); + + ValueVector Res; + Res.resize(NumElems); + for (unsigned I = 0; I < NumElems; ++I) { + SmallVector Indices; + Indices.resize(NumIndices); + for (unsigned J = 0; J < NumIndices; ++J) + Indices[J] = Ops[J][I]; + Res[I] = Builder.CreateGEP(Base[I], Indices, + GEPI.getName() + ".i" + Twine(I)); + if (GEPI.isInBounds()) + if (GetElementPtrInst *NewGEPI = dyn_cast(Res[I])) + NewGEPI->setIsInBounds(); + } + gather(&GEPI, Res); + return true; } bool Scalarizer::visitCastInst(CastInst &CI) { diff --git a/test/Transforms/Scalarizer/basic.ll b/test/Transforms/Scalarizer/basic.ll index c8c1bc0260d..67e61057bc5 100644 --- a/test/Transforms/Scalarizer/basic.ll +++ b/test/Transforms/Scalarizer/basic.ll @@ -382,6 +382,38 @@ define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) { ret void } +; Test vector GEPs with more than one index. +define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, + float *%other) { +; CHECK-LABEL: @f13( +; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** +; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1 +; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2 +; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3 +; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0 +; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0 +; CHECK: %val.i0 = getelementptr inbounds [4 x float]* %ptr.i0, i32 0, i32 %i.i0 +; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1 +; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1 +; CHECK: %val.i1 = getelementptr inbounds [4 x float]* %ptr.i1, i32 1, i32 %i.i1 +; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2 +; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2 +; CHECK: %val.i2 = getelementptr inbounds [4 x float]* %ptr.i2, i32 2, i32 %i.i2 +; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3 +; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3 +; CHECK: %val.i3 = getelementptr inbounds [4 x float]* %ptr.i3, i32 3, i32 %i.i3 +; CHECK: store float* %val.i0, float** %dest.i0, align 32 +; CHECK: store float* %val.i1, float** %dest.i1, align 8 +; CHECK: store float* %val.i2, float** %dest.i2, align 16 +; CHECK: store float* %val.i3, float** %dest.i3, align 8 +; CHECK: ret void + %val = getelementptr inbounds <4 x [4 x float] *> %ptr, + <4 x i32> , + <4 x i32> %i + store <4 x float *> %val, <4 x float *> *%dest + ret void +} + !0 = metadata !{ metadata !"root" } !1 = metadata !{ metadata !"set1", metadata !0 } !2 = metadata !{ metadata !"set2", metadata !0 }