From db0a73f31bddcb442a672be4de598ba432acc6f8 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 21 Apr 2014 22:06:04 +0000 Subject: [PATCH] Simplify a vpermil* with constant mask. With a constant mask a vpermil* is just a shufflevector. This patch implements that simplification. This allows us to produce denser code. It should also allow more folding down the line. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206801 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCalls.cpp | 15 ++++++++++ .../InstCombine/vec_demanded_elts.ll | 30 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 19e28fe393f..d9b5da3ddc4 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -577,6 +577,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_avx_vpermilvar_ps: + case Intrinsic::x86_avx_vpermilvar_ps_256: + case Intrinsic::x86_avx_vpermilvar_pd: + case Intrinsic::x86_avx_vpermilvar_pd_256: { + // Convert vpermil* to shufflevector if the mask is constant. + Value *V = II->getArgOperand(1); + if (auto C = dyn_cast(V)) { + auto V1 = II->getArgOperand(0); + auto V2 = UndefValue::get(V1->getType()); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, C); + return ReplaceInstUsesWith(CI, Shuffle); + } + break; + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (Constant *Mask = dyn_cast(II->getArgOperand(2))) { diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll index d12412a9297..f75ea032735 100644 --- a/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -209,4 +209,34 @@ define <4 x float> @test_select(float %f, float %g) { ret <4 x float> %ret } +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) +define <4 x float> @test_vpermilvar_ps(<4 x float> %v) { +; CHECK-LABEL: @test_vpermilvar_ps( +; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> + %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> ) + ret <4 x float> %a +} +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) +define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) { +; CHECK-LABEL: @test_vpermilvar_ps_256( +; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> + %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> ) + ret <8 x float> %a +} + +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i32>) +define <2 x double> @test_vpermilvar_pd(<2 x double> %v) { +; CHECK-LABEL: @test_vpermilvar_pd( +; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> + %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> ) + ret <2 x double> %a +} + +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>) +define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) { +; CHECK-LABEL: @test_vpermilvar_pd_256( +; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> + %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> ) + ret <4 x double> %a +}