From 4ca0ce2594a3e3d420be548cb3b6e4e8f51e229f Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 2 Sep 2013 06:40:09 +0000 Subject: [PATCH] llvm interpreter: select, shuffle and insertelement instructions. This patch implements vector support for select instruction and adds specific vector instructions : shuffle and insertelement. (tests are also included) and functions lle_X_memset, lle_X_memcpy added. Done by Veselov, Yuri (mailto:Yuri.Veselov@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189735 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/Interpreter/Execution.cpp | 129 +++++++++++++++++- .../Interpreter/ExternalFunctions.cpp | 32 ++++- lib/ExecutionEngine/Interpreter/Interpreter.h | 3 + .../test-interp-vec-insertelement.ll | 41 ++++++ .../ExecutionEngine/test-interp-vec-select.ll | 118 ++++++++++++++++ .../test-interp-vec-shuffle.ll | 81 +++++++++++ 6 files changed, 396 insertions(+), 8 deletions(-) create mode 100644 test/ExecutionEngine/test-interp-vec-insertelement.ll create mode 100644 test/ExecutionEngine/test-interp-vec-select.ll create mode 100644 test/ExecutionEngine/test-interp-vec-shuffle.ll diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index fc3d579d971..e02ba15ab10 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -786,20 +786,31 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { } static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2, - GenericValue Src3) { - return Src1.IntVal == 0 ? Src3 : Src2; + GenericValue Src3, const Type *Ty) { + GenericValue Dest; + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + assert(Src2.AggregateVal.size() == Src3.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + for (size_t i = 0; i < Src1.AggregateVal.size(); ++i) + Dest.AggregateVal[i] = (Src1.AggregateVal[i].IntVal == 0) ? + Src3.AggregateVal[i] : Src2.AggregateVal[i]; + } else { + Dest = (Src1.IntVal == 0) ? Src3 : Src2; + } + return Dest; } void Interpreter::visitSelectInst(SelectInst &I) { ExecutionContext &SF = ECStack.back(); + const Type * Ty = I.getOperand(0)->getType(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Src3 = getOperandValue(I.getOperand(2), SF); - GenericValue R = executeSelectInst(Src1, Src2, Src3); + GenericValue R = executeSelectInst(Src1, Src2, Src3, Ty); SetValue(&I, R, SF); } - //===----------------------------------------------------------------------===// // Terminator Instruction Implementations //===----------------------------------------------------------------------===// @@ -1793,10 +1804,115 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) { SetValue(&I, Dest, SF); } +void Interpreter::visitInsertElementInst(InsertElementInst &I) { + ExecutionContext &SF = ECStack.back(); + Type *Ty = I.getType(); + + if(!(Ty->isVectorTy()) ) + llvm_unreachable("Unhandled dest type for insertelement instruction"); + + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Src3 = getOperandValue(I.getOperand(2), SF); + GenericValue Dest; + + Type *TyContained = Ty->getContainedType(0); + + const unsigned indx = unsigned(Src3.IntVal.getZExtValue()); + Dest.AggregateVal = Src1.AggregateVal; + + if(Src1.AggregateVal.size() <= indx) + llvm_unreachable("Invalid index in insertelement instruction"); + switch (TyContained->getTypeID()) { + default: + llvm_unreachable("Unhandled dest type for insertelement instruction"); + case Type::IntegerTyID: + Dest.AggregateVal[indx].IntVal = Src2.IntVal; + break; + case Type::FloatTyID: + Dest.AggregateVal[indx].FloatVal = Src2.FloatVal; + break; + case Type::DoubleTyID: + Dest.AggregateVal[indx].DoubleVal = Src2.DoubleVal; + break; + } + SetValue(&I, Dest, SF); +} + +void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){ + ExecutionContext &SF = ECStack.back(); + + Type *Ty = I.getType(); + if(!(Ty->isVectorTy())) + llvm_unreachable("Unhandled dest type for shufflevector instruction"); + + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Src3 = getOperandValue(I.getOperand(2), SF); + GenericValue Dest; + + // There is no need to check types of src1 and src2, because the compiled + // bytecode can't contain different types for src1 and src2 for a + // shufflevector instruction. + + Type *TyContained = Ty->getContainedType(0); + unsigned src1Size = (unsigned)Src1.AggregateVal.size(); + unsigned src2Size = (unsigned)Src2.AggregateVal.size(); + unsigned src3Size = (unsigned)Src3.AggregateVal.size(); + + Dest.AggregateVal.resize(src3Size); + + switch (TyContained->getTypeID()) { + default: + llvm_unreachable("Unhandled dest type for insertelement instruction"); + break; + case Type::IntegerTyID: + for( unsigned i=0; i , <2 x i32> undef, + // <2 x i32> < i32 0, i32 5 >, + // where i32 5 is invalid, but let it be additional check here: + llvm_unreachable("Invalid mask in shufflevector instruction"); + } + break; + case Type::FloatTyID: + for( unsigned i=0; igetOpcode()) { - case Instruction::Trunc: + case Instruction::Trunc: return executeTruncInst(CE->getOperand(0), CE->getType(), SF); case Instruction::ZExt: return executeZExtInst(CE->getOperand(0), CE->getType(), SF); @@ -1832,7 +1948,8 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, case Instruction::Select: return executeSelectInst(getOperandValue(CE->getOperand(0), SF), getOperandValue(CE->getOperand(1), SF), - getOperandValue(CE->getOperand(2), SF)); + getOperandValue(CE->getOperand(2), SF), + CE->getOperand(0)->getType()); default : break; } diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index bef4bbf6602..6fb797347a8 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -406,6 +406,7 @@ GenericValue lle_X_sprintf(FunctionType *FT, break; } } + return GV; } // int printf(const char *, ...) - a very rough implementation to make output @@ -434,7 +435,7 @@ GenericValue lle_X_sscanf(FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4], - Args[5], Args[6], Args[7], Args[8], Args[9])); + Args[5], Args[6], Args[7], Args[8], Args[9])); return GV; } @@ -450,7 +451,7 @@ GenericValue lle_X_scanf(FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4], - Args[5], Args[6], Args[7], Args[8], Args[9])); + Args[5], Args[6], Args[7], Args[8], Args[9])); return GV; } @@ -470,6 +471,31 @@ GenericValue lle_X_fprintf(FunctionType *FT, return GV; } +GenericValue lle_X_memset(FunctionType *FT, + const std::vector &Args) { + int val = (int)Args[1].IntVal.getSExtValue(); + size_t len = (size_t)Args[2].IntVal.getZExtValue(); + memset((void*)GVTOP(Args[0]),val, len); + // llvm.memset.* returns void, lle_X_* returns GenericValue, + // so here we return GenericValue with IntVal set to zero + GenericValue GV; + GV.IntVal = 0; + return GV; +} + +GenericValue lle_X_memcpy(FunctionType *FT, + const std::vector &Args) { + + memcpy(GVTOP(Args[0]), GVTOP(Args[1]), + (size_t)(Args[2].IntVal.getLimitedValue())); + + // llvm.mecpy* returns void, lle_X_* returns GenericValue, + // so here we return GenericValue with IntVal set to zero + GenericValue GV; + GV.IntVal = 0; + return GV; +} + void Interpreter::initializeExternalFunctions() { sys::ScopedLock Writer(*FunctionsLock); FuncNames["lle_X_atexit"] = lle_X_atexit; @@ -481,4 +507,6 @@ void Interpreter::initializeExternalFunctions() { FuncNames["lle_X_sscanf"] = lle_X_sscanf; FuncNames["lle_X_scanf"] = lle_X_scanf; FuncNames["lle_X_fprintf"] = lle_X_fprintf; + FuncNames["lle_X_memset"] = lle_X_memset; + FuncNames["lle_X_memcpy"] = lle_X_memcpy; } diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 2952d7eabe2..802b53e2476 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -179,6 +179,9 @@ public: void visitVAArgInst(VAArgInst &I); void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitShuffleVectorInst(ShuffleVectorInst &I); + void visitInstruction(Instruction &I) { errs() << I << "\n"; llvm_unreachable("Instruction not interpretable yet!"); diff --git a/test/ExecutionEngine/test-interp-vec-insertelement.ll b/test/ExecutionEngine/test-interp-vec-insertelement.ll new file mode 100644 index 00000000000..814b90533d0 --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-insertelement.ll @@ -0,0 +1,41 @@ + ; RUN: %lli -force-interpreter=true %s > /dev/null + +define i32 @main() { + %v0 = insertelement <2 x i8> zeroinitializer, i8 1, i32 1 + %v1 = insertelement <3 x i8> zeroinitializer, i8 2, i32 2 + %v2 = insertelement <4 x i8> zeroinitializer, i8 3, i32 3 + %v3 = insertelement <8 x i8> zeroinitializer, i8 4, i32 4 + %v4 = insertelement <16 x i8> zeroinitializer, i8 5, i32 7 + + %v5 = insertelement <2 x i16> zeroinitializer, i16 1, i32 1 + %v6 = insertelement <3 x i16> zeroinitializer, i16 2, i32 2 + %v7 = insertelement <4 x i16> zeroinitializer, i16 3, i32 3 + %v8 = insertelement <8 x i16> zeroinitializer, i16 4, i32 4 + %v9 = insertelement <16 x i16> zeroinitializer, i16 5, i32 7 + + %v10 = insertelement <2 x i32> zeroinitializer, i32 1, i32 1 + %v11 = insertelement <3 x i32> zeroinitializer, i32 2, i32 2 + %v12 = insertelement <4 x i32> zeroinitializer, i32 3, i32 3 + %v13 = insertelement <8 x i32> zeroinitializer, i32 4, i32 4 + %v14 = insertelement <16 x i32> zeroinitializer, i32 5, i32 7 + + %v15 = insertelement <2 x i64> zeroinitializer, i64 1, i32 1 + %v16 = insertelement <3 x i64> zeroinitializer, i64 2, i32 2 + %v17 = insertelement <4 x i64> zeroinitializer, i64 3, i32 3 + %v18 = insertelement <8 x i64> zeroinitializer, i64 4, i32 4 + %v19 = insertelement <16 x i64> zeroinitializer, i64 5, i32 7 + + %v20 = insertelement <2 x float> zeroinitializer, float 1.0, i32 1 + %v21 = insertelement <3 x float> zeroinitializer, float 2.0, i32 2 + %v22 = insertelement <4 x float> zeroinitializer, float 3.0, i32 3 + %v23 = insertelement <8 x float> zeroinitializer, float 4.0, i32 4 + %v24 = insertelement <16 x float> zeroinitializer, float 5.0, i32 7 + + %v25 = insertelement <2 x double> zeroinitializer, double 1.0, i32 1 + %v26 = insertelement <3 x double> zeroinitializer, double 2.0, i32 2 + %v27 = insertelement <4 x double> zeroinitializer, double 3.0, i32 3 + %v28 = insertelement <8 x double> zeroinitializer, double 4.0, i32 4 + %v29 = insertelement <16 x double> zeroinitializer, double 5.0, i32 7 + + ret i32 0 +} diff --git a/test/ExecutionEngine/test-interp-vec-select.ll b/test/ExecutionEngine/test-interp-vec-select.ll new file mode 100644 index 00000000000..ce086e408dd --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-select.ll @@ -0,0 +1,118 @@ +; RUN: %lli -force-interpreter=true %s > /dev/null + +define i32 @main() { + + ; Vector values + %a2_i8 = add <2 x i8> zeroinitializer, + %a3_i8 = add <3 x i8> zeroinitializer, + %a4_i8 = add <4 x i8> zeroinitializer, + %a8_i8 = add <8 x i8> zeroinitializer, + %a16_i8 = add <16 x i8> zeroinitializer, + + %a2_i16 = add <2 x i16> zeroinitializer, + %a3_i16 = add <3 x i16> zeroinitializer, + %a4_i16 = add <4 x i16> zeroinitializer, + %a8_i16 = add <8 x i16> zeroinitializer, + %a16_i16 = add <16 x i16> zeroinitializer, + + %a2_i32 = add <2 x i32> zeroinitializer, + %a3_i32 = add <3 x i32> zeroinitializer, + %a4_i32 = add <4 x i32> zeroinitializer, + %a8_i32 = add <8 x i32> zeroinitializer, + %a16_i32 = add <16 x i32> zeroinitializer, + + %a2_i64 = add <2 x i64> zeroinitializer, + %a3_i64 = add <3 x i64> zeroinitializer, + %a4_i64 = add <4 x i64> zeroinitializer, + %a8_i64 = add <8 x i64> zeroinitializer, + %a16_i64 = add <16 x i64> zeroinitializer, + + %a2_float = fadd <2 x float> zeroinitializer, + %a3_float = fadd <3 x float> zeroinitializer, + %a4_float = fadd <4 x float> zeroinitializer, + %a8_float = fadd <8 x float> zeroinitializer, + %a16_float = fadd <16 x float> zeroinitializer, + + %a2_double = fadd <2 x double> zeroinitializer, + %a3_double = fadd <3 x double> zeroinitializer, + %a4_double = fadd <4 x double> zeroinitializer, + %a8_double = fadd <8 x double> zeroinitializer, + %a16_double = fadd <16 x double> zeroinitializer, + + %b2_i8 = sub <2 x i8> zeroinitializer, %a2_i8 + %b3_i8 = sub <3 x i8> zeroinitializer, %a3_i8 + %b4_i8 = sub <4 x i8> zeroinitializer, %a4_i8 + %b8_i8 = sub <8 x i8> zeroinitializer, %a8_i8 + %b16_i8 = sub <16 x i8> zeroinitializer, %a16_i8 + + %b2_i16 = sub <2 x i16> zeroinitializer, %a2_i16 + %b3_i16 = sub <3 x i16> zeroinitializer, %a3_i16 + %b4_i16 = sub <4 x i16> zeroinitializer, %a4_i16 + %b8_i16 = sub <8 x i16> zeroinitializer, %a8_i16 + %b16_i16 = sub <16 x i16> zeroinitializer, %a16_i16 + + %b2_i32 = sub <2 x i32> zeroinitializer, %a2_i32 + %b3_i32 = sub <3 x i32> zeroinitializer, %a3_i32 + %b4_i32 = sub <4 x i32> zeroinitializer, %a4_i32 + %b8_i32 = sub <8 x i32> zeroinitializer, %a8_i32 + %b16_i32 = sub <16 x i32> zeroinitializer, %a16_i32 + + %b2_i64 = sub <2 x i64> zeroinitializer, %a2_i64 + %b3_i64 = sub <3 x i64> zeroinitializer, %a3_i64 + %b4_i64 = sub <4 x i64> zeroinitializer, %a4_i64 + %b8_i64 = sub <8 x i64> zeroinitializer, %a8_i64 + %b16_i64 = sub <16 x i64> zeroinitializer, %a16_i64 + + %b2_float = fsub <2 x float> zeroinitializer, %a2_float + %b3_float = fsub <3 x float> zeroinitializer, %a3_float + %b4_float = fsub <4 x float> zeroinitializer, %a4_float + %b8_float = fsub <8 x float> zeroinitializer, %a8_float + %b16_float = fsub <16 x float> zeroinitializer, %a16_float + + %b2_double = fsub <2 x double> zeroinitializer, %a2_double + %b3_double = fsub <3 x double> zeroinitializer, %a3_double + %b4_double = fsub <4 x double> zeroinitializer, %a4_double + %b8_double = fsub <8 x double> zeroinitializer, %a8_double + %b16_double = fsub <16 x double> zeroinitializer, %a16_double + + + + %v0 = select <2 x i1> , <2 x i8> %a2_i8, <2 x i8> %b2_i8 + %v1 = select <3 x i1> , <3 x i8> %a3_i8, <3 x i8> %b3_i8 + %v2 = select <4 x i1> , <4 x i8> %a4_i8, <4 x i8> %b4_i8 + %v3 = select <8 x i1> , <8 x i8> %a8_i8, <8 x i8> %b8_i8 + %v4 = select <16 x i1> , <16 x i8> %a16_i8, <16 x i8> %b16_i8 + + %v5 = select <2 x i1> , <2 x i16> %a2_i16, <2 x i16> %b2_i16 + %v6 = select <3 x i1> , <3 x i16> %a3_i16, <3 x i16> %b3_i16 + %v7 = select <4 x i1> , <4 x i16> %a4_i16, <4 x i16> %b4_i16 + %v8 = select <8 x i1> , <8 x i16> %a8_i16, <8 x i16> %b8_i16 + %v9 = select <16 x i1> , <16 x i16> %a16_i16, <16 x i16> %b16_i16 + + %v10 = select <2 x i1> , <2 x i32> %a2_i32, <2 x i32> %b2_i32 + %v11 = select <3 x i1> , <3 x i32> %a3_i32, <3 x i32> %b3_i32 + %v12 = select <4 x i1> , <4 x i32> %a4_i32, <4 x i32> %b4_i32 + %v13 = select <8 x i1> , <8 x i32> %a8_i32, <8 x i32> %b8_i32 + %v14 = select <16 x i1> , <16 x i32> %a16_i32, <16 x i32> %b16_i32 + + %v15 = select <2 x i1> , <2 x i64> %a2_i64, <2 x i64> %b2_i64 + %v16 = select <3 x i1> , <3 x i64> %a3_i64, <3 x i64> %b3_i64 + %v17 = select <4 x i1> , <4 x i64> %a4_i64, <4 x i64> %b4_i64 + %v18 = select <8 x i1> , <8 x i64> %a8_i64, <8 x i64> %b8_i64 + %v19 = select <16 x i1> , <16 x i64> %a16_i64, <16 x i64> %b16_i64 + + %v20 = select <2 x i1> , <2 x float> %a2_float, <2 x float> %b2_float + %v21 = select <3 x i1> , <3 x float> %a3_float, <3 x float> %b3_float + %v22 = select <4 x i1> , <4 x float> %a4_float, <4 x float> %b4_float + %v23 = select <8 x i1> , <8 x float> %a8_float, <8 x float> %b8_float + %v24 = select <16 x i1> , <16 x float> %a16_float, <16 x float> %b16_float + + %v25 = select <2 x i1> , <2 x double> %a2_double, <2 x double> %b2_double + %v26 = select <3 x i1> , <3 x double> %a3_double, <3 x double> %b3_double + %v27 = select <4 x i1> , <4 x double> %a4_double, <4 x double> %b4_double + %v28 = select <8 x i1> , <8 x double> %a8_double, <8 x double> %b8_double + %v29 = select <16 x i1> , <16 x double> %a16_double, <16 x double> %b16_double + + + ret i32 0 +} diff --git a/test/ExecutionEngine/test-interp-vec-shuffle.ll b/test/ExecutionEngine/test-interp-vec-shuffle.ll new file mode 100644 index 00000000000..e55fa99e3bb --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-shuffle.ll @@ -0,0 +1,81 @@ +; RUN: %lli -force-interpreter=true %s > /dev/null + +define i32 @main() { + + ; Vector values + %a2_i8 = add <2 x i8> zeroinitializer, + %a3_i8 = add <3 x i8> zeroinitializer, + %a4_i8 = add <4 x i8> zeroinitializer, + %a8_i8 = add <8 x i8> zeroinitializer, + %a16_i8 = add <16 x i8> zeroinitializer, + + %a2_i16 = add <2 x i16> zeroinitializer, + %a3_i16 = add <3 x i16> zeroinitializer, + %a4_i16 = add <4 x i16> zeroinitializer, + %a8_i16 = add <8 x i16> zeroinitializer, + %a16_i16 = add <16 x i16> zeroinitializer, + + %a2_i32 = add <2 x i32> zeroinitializer, + %a3_i32 = add <3 x i32> zeroinitializer, + %a4_i32 = add <4 x i32> zeroinitializer, + %a8_i32 = add <8 x i32> zeroinitializer, + %a16_i32 = add <16 x i32> zeroinitializer, + + %a2_i64 = add <2 x i64> zeroinitializer, + %a3_i64 = add <3 x i64> zeroinitializer, + %a4_i64 = add <4 x i64> zeroinitializer, + %a8_i64 = add <8 x i64> zeroinitializer, + %a16_i64 = add <16 x i64> zeroinitializer, + + %a2_float = fadd <2 x float> zeroinitializer, + %a3_float = fadd <3 x float> zeroinitializer, + %a4_float = fadd <4 x float> zeroinitializer, + %a8_float = fadd <8 x float> zeroinitializer, + %a16_float = fadd <16 x float> zeroinitializer, + + %a2_double = fadd <2 x double> zeroinitializer, + %a3_double = fadd <3 x double> zeroinitializer, + %a4_double = fadd <4 x double> zeroinitializer, + %a8_double = fadd <8 x double> zeroinitializer, + %a16_double = fadd <16 x double> zeroinitializer, + + + %v0 = shufflevector <2 x i8> %a2_i8, <2 x i8>undef, <2 x i32> + %v1 = shufflevector <3 x i8> %a3_i8, <3 x i8>undef, <3 x i32> + %v2 = shufflevector <4 x i8> %a4_i8, <4 x i8>undef, <4 x i32> + %v3 = shufflevector <8 x i8> %a8_i8, <8 x i8>undef, <8 x i32> + %v4 = shufflevector <16 x i8> %a16_i8, <16 x i8>undef, <16 x i32> + + %v5 = shufflevector <2 x i16> %a2_i16, <2 x i16>undef, <2 x i32> + %v6 = shufflevector <3 x i16> %a3_i16, <3 x i16>undef, <3 x i32> + %v7 = shufflevector <4 x i16> %a4_i16, <4 x i16>undef, <4 x i32> + %v8 = shufflevector <8 x i16> %a8_i16, <8 x i16>undef, <8 x i32> + %v9 = shufflevector <16 x i16> %a16_i16, <16 x i16>undef, <16 x i32> + + %v10 = shufflevector <2 x i32> %a2_i32, <2 x i32>undef, <2 x i32> + %v11 = shufflevector <3 x i32> %a3_i32, <3 x i32>undef, <3 x i32> + %v12 = shufflevector <4 x i32> %a4_i32, <4 x i32>undef, <4 x i32> + %v13 = shufflevector <8 x i32> %a8_i32, <8 x i32>undef, <8 x i32> + %v14 = shufflevector <16 x i32> %a16_i32, <16 x i32>undef, <16 x i32> + + %v15 = shufflevector <2 x i64> %a2_i64, <2 x i64>undef, <2 x i32> + %v16 = shufflevector <3 x i64> %a3_i64, <3 x i64>undef, <3 x i32> + %v17 = shufflevector <4 x i64> %a4_i64, <4 x i64>undef, <4 x i32> + %v18 = shufflevector <8 x i64> %a8_i64, <8 x i64>undef, <8 x i32> + %v19 = shufflevector <16 x i64> %a16_i64, <16 x i64>undef, <16 x i32> + + %v20 = shufflevector <2 x float> %a2_float, <2 x float>undef, <2 x i32> + %v21 = shufflevector <3 x float> %a3_float, <3 x float>undef, <3 x i32> + %v22 = shufflevector <4 x float> %a4_float, <4 x float>undef, <4 x i32> + %v23 = shufflevector <8 x float> %a8_float, <8 x float>undef, <8 x i32> + %v24 = shufflevector <16 x float> %a16_float, <16 x float>undef, <16 x i32> + + %v25 = shufflevector <2 x double> %a2_double, <2 x double>undef, <2 x i32> + %v26 = shufflevector <3 x double> %a3_double, <3 x double>undef, <3 x i32> + %v27 = shufflevector <4 x double> %a4_double, <4 x double>undef, <4 x i32> + %v28 = shufflevector <8 x double> %a8_double, <8 x double>undef, <8 x i32> + %v29 = shufflevector <16 x double> %a16_double, <16 x double>undef, <16 x i32> + + ret i32 0 +} +