From 0ff8a4fa35db853dfc3dbe54535aa326d8e742a2 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 31 Mar 2011 22:57:29 +0000 Subject: [PATCH] Instcombile optimization: extractelement(cast) -> cast(extractelement) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128683 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineVectorOps.cpp | 10 ++++++- test/Transforms/InstCombine/ExtractCast.ll | 27 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/InstCombine/ExtractCast.ll diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 5caa12dfdfa..ad6a8d054ee 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { ConstantInt::get(Int32Ty, SrcIdx, false)); } + } else if (CastInst *CI = dyn_cast(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && EI.hasOneUse() && + (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } diff --git a/test/Transforms/InstCombine/ExtractCast.ll b/test/Transforms/InstCombine/ExtractCast.ll new file mode 100644 index 00000000000..5ebbefd97b3 --- /dev/null +++ b/test/Transforms/InstCombine/ExtractCast.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -instcombine -S -o - | FileCheck %s + +; CHECK: @a +define i32 @a(<4 x i64> %I) { +entry: +; CHECK-NOT: trunc <4 x i64> + %J = trunc <4 x i64> %I to <4 x i32> + %K = extractelement <4 x i32> %J, i32 3 +; CHECK: extractelement <4 x i64> +; CHECK: trunc i64 +; CHECK: ret + ret i32 %K +} + + +; CHECK: @b +define i32 @b(<4 x float> %I) { +entry: +; CHECK-NOT: fptosi <4 x float> + %J = fptosi <4 x float> %I to <4 x i32> + %K = extractelement <4 x i32> %J, i32 3 +; CHECK: extractelement <4 x float> +; CHECK: fptosi float +; CHECK: ret + ret i32 %K +} +