diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d4b583b3a7d..d4bdd75fa82 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -800,6 +800,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + // Note that ppc_altivec_vperm has a big-endian bias, so when creating + // a vectorshuffle for little endian, we must undo the transformation + // performed on vec_perm in altivec.h. That is, we must complement + // the permutation mask with respect to 31 and reverse the order of + // V1 and V2. if (Constant *Mask = dyn_cast(II->getArgOperand(2))) { assert(Mask->getType()->getVectorNumElements() == 16 && "Bad type for intrinsic!"); @@ -832,10 +837,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned Idx = cast(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. + if (DL && DL->isLittleEndian()) + Idx = 31 - Idx; if (!ExtractedElts[Idx]) { + Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, Builder->getInt32(Idx&15)); }