llvm interpreter: select, shuffle and insertelement instructions.

This patch implements vector support for select instruction and adds specific vector instructions : shuffle and insertelement. (tests are also included) and functions lle_X_memset, lle_X_memcpy added. Done by Veselov, Yuri (mailto:Yuri.Veselov@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189735 91177308-0d34-0410-b5e6-96231b3b80d8
2025-10-25 10:27:04 +00:00 · 2013-09-02 06:40:09 +00:00
parent ca6ead9f3f
commit 4ca0ce2594
6 changed files with 396 additions and 8 deletions
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -786,20 +786,31 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
 }

 static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
-                                      GenericValue Src3) {
-  return Src1.IntVal == 0 ? Src3 : Src2;
+                                      GenericValue Src3, const Type *Ty) {
+    GenericValue Dest;
+    if(Ty->isVectorTy()) {
+      assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+      assert(Src2.AggregateVal.size() == Src3.AggregateVal.size());
+      Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+      for (size_t i = 0; i < Src1.AggregateVal.size(); ++i)
+        Dest.AggregateVal[i] = (Src1.AggregateVal[i].IntVal == 0) ?
+          Src3.AggregateVal[i] : Src2.AggregateVal[i];
+    } else {
+      Dest = (Src1.IntVal == 0) ? Src3 : Src2;
+    }
+    return Dest;
 }

 void Interpreter::visitSelectInst(SelectInst &I) {
  ExecutionContext &SF = ECStack.back();
+  const Type * Ty = I.getOperand(0)->getType();
  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
  GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
-  GenericValue R = executeSelectInst(Src1, Src2, Src3);
+  GenericValue R = executeSelectInst(Src1, Src2, Src3, Ty);
  SetValue(&I, R, SF);
 }

-
 //===----------------------------------------------------------------------===//
 //                     Terminator Instruction Implementations
 //===----------------------------------------------------------------------===//
@@ -1793,10 +1804,115 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
  SetValue(&I, Dest, SF);
 }

+void Interpreter::visitInsertElementInst(InsertElementInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  Type *Ty = I.getType();
+
+  if(!(Ty->isVectorTy()) )
+    llvm_unreachable("Unhandled dest type for insertelement instruction");
+
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+  GenericValue Dest;
+
+  Type *TyContained = Ty->getContainedType(0);
+
+  const unsigned indx = unsigned(Src3.IntVal.getZExtValue());
+  Dest.AggregateVal = Src1.AggregateVal;
+
+  if(Src1.AggregateVal.size() <= indx)
+      llvm_unreachable("Invalid index in insertelement instruction");
+  switch (TyContained->getTypeID()) {
+    default:
+      llvm_unreachable("Unhandled dest type for insertelement instruction");
+    case Type::IntegerTyID:
+      Dest.AggregateVal[indx].IntVal = Src2.IntVal;
+      break;
+    case Type::FloatTyID:
+      Dest.AggregateVal[indx].FloatVal = Src2.FloatVal;
+      break;
+    case Type::DoubleTyID:
+      Dest.AggregateVal[indx].DoubleVal = Src2.DoubleVal;
+      break;
+  }
+  SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){
+  ExecutionContext &SF = ECStack.back();
+
+  Type *Ty = I.getType();
+  if(!(Ty->isVectorTy()))
+    llvm_unreachable("Unhandled dest type for shufflevector instruction");
+
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+  GenericValue Dest;
+
+  // There is no need to check types of src1 and src2, because the compiled
+  // bytecode can't contain different types for src1 and src2 for a
+  // shufflevector instruction.
+
+  Type *TyContained = Ty->getContainedType(0);
+  unsigned src1Size = (unsigned)Src1.AggregateVal.size();
+  unsigned src2Size = (unsigned)Src2.AggregateVal.size();
+  unsigned src3Size = (unsigned)Src3.AggregateVal.size();
+
+  Dest.AggregateVal.resize(src3Size);
+
+  switch (TyContained->getTypeID()) {
+    default:
+      llvm_unreachable("Unhandled dest type for insertelement instruction");
+      break;
+    case Type::IntegerTyID:
+      for( unsigned i=0; i<src3Size; i++) {
+        unsigned j = Src3.AggregateVal[i].IntVal.getZExtValue();
+        if(j < src1Size)
+          Dest.AggregateVal[i].IntVal = Src1.AggregateVal[j].IntVal;
+        else if(j < src1Size + src2Size)
+          Dest.AggregateVal[i].IntVal = Src2.AggregateVal[j-src1Size].IntVal;
+        else
+          // The selector may not be greater than sum of lengths of first and
+          // second operands and llasm should not allow situation like
+          // %tmp = shufflevector <2 x i32> <i32 3, i32 4>, <2 x i32> undef,
+          //                      <2 x i32> < i32 0, i32 5 >,
+          // where i32 5 is invalid, but let it be additional check here:
+          llvm_unreachable("Invalid mask in shufflevector instruction");
+      }
+      break;
+    case Type::FloatTyID:
+      for( unsigned i=0; i<src3Size; i++) {
+        unsigned j = Src3.AggregateVal[i].IntVal.getZExtValue();
+        if(j < src1Size)
+          Dest.AggregateVal[i].FloatVal = Src1.AggregateVal[j].FloatVal;
+        else if(j < src1Size + src2Size)
+          Dest.AggregateVal[i].FloatVal = Src2.AggregateVal[j-src1Size].FloatVal;
+        else
+          llvm_unreachable("Invalid mask in shufflevector instruction");
+        }
+      break;
+    case Type::DoubleTyID:
+      for( unsigned i=0; i<src3Size; i++) {
+        unsigned j = Src3.AggregateVal[i].IntVal.getZExtValue();
+        if(j < src1Size)
+          Dest.AggregateVal[i].DoubleVal = Src1.AggregateVal[j].DoubleVal;
+        else if(j < src1Size + src2Size)
+          Dest.AggregateVal[i].DoubleVal =
+            Src2.AggregateVal[j-src1Size].DoubleVal;
+        else
+          llvm_unreachable("Invalid mask in shufflevector instruction");
+      }
+      break;
+  }
+  SetValue(&I, Dest, SF);
+}
+
 GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
                                                ExecutionContext &SF) {
  switch (CE->getOpcode()) {
-  case Instruction::Trunc:   
+  case Instruction::Trunc:
      return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
  case Instruction::ZExt:
      return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
@@ -1832,7 +1948,8 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
  case Instruction::Select:
    return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
                             getOperandValue(CE->getOperand(1), SF),
-                             getOperandValue(CE->getOperand(2), SF));
+                             getOperandValue(CE->getOperand(2), SF),
+                             CE->getOperand(0)->getType());
  default :
    break;
  }
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -406,6 +406,7 @@ GenericValue lle_X_sprintf(FunctionType *FT,
      break;
    }
  }
+  return GV;
 }

 // int printf(const char *, ...) - a very rough implementation to make output
@@ -434,7 +435,7 @@ GenericValue lle_X_sscanf(FunctionType *FT,

  GenericValue GV;
  GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
-                        Args[5], Args[6], Args[7], Args[8], Args[9]));
+                    Args[5], Args[6], Args[7], Args[8], Args[9]));
  return GV;
 }

@@ -450,7 +451,7 @@ GenericValue lle_X_scanf(FunctionType *FT,

  GenericValue GV;
  GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
-                        Args[5], Args[6], Args[7], Args[8], Args[9]));
+                    Args[5], Args[6], Args[7], Args[8], Args[9]));
  return GV;
 }

@@ -470,6 +471,31 @@ GenericValue lle_X_fprintf(FunctionType *FT,
  return GV;
 }

+GenericValue lle_X_memset(FunctionType *FT,
+                           const std::vector<GenericValue> &Args) {
+  int val = (int)Args[1].IntVal.getSExtValue();
+  size_t len = (size_t)Args[2].IntVal.getZExtValue();
+  memset((void*)GVTOP(Args[0]),val, len);
+  // llvm.memset.* returns void, lle_X_* returns GenericValue,
+  // so here we return GenericValue with IntVal set to zero
+  GenericValue GV;
+  GV.IntVal = 0;
+  return GV;
+}
+
+GenericValue lle_X_memcpy(FunctionType *FT,
+                          const std::vector<GenericValue> &Args) {
+
+  memcpy(GVTOP(Args[0]), GVTOP(Args[1]),
+         (size_t)(Args[2].IntVal.getLimitedValue()));
+
+  // llvm.mecpy* returns void, lle_X_* returns GenericValue,
+  // so here we return GenericValue with IntVal set to zero
+  GenericValue GV;
+  GV.IntVal = 0;
+  return GV;
+}
+
 void Interpreter::initializeExternalFunctions() {
  sys::ScopedLock Writer(*FunctionsLock);
  FuncNames["lle_X_atexit"]       = lle_X_atexit;
@@ -481,4 +507,6 @@ void Interpreter::initializeExternalFunctions() {
  FuncNames["lle_X_sscanf"]       = lle_X_sscanf;
  FuncNames["lle_X_scanf"]        = lle_X_scanf;
  FuncNames["lle_X_fprintf"]      = lle_X_fprintf;
+  FuncNames["lle_X_memset"]       = lle_X_memset;
+  FuncNames["lle_X_memcpy"]       = lle_X_memcpy;
 }
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -179,6 +179,9 @@ public:

  void visitVAArgInst(VAArgInst &I);
  void visitExtractElementInst(ExtractElementInst &I);
+  void visitInsertElementInst(InsertElementInst &I);
+  void visitShuffleVectorInst(ShuffleVectorInst &I);
+
  void visitInstruction(Instruction &I) {
    errs() << I << "\n";
    llvm_unreachable("Instruction not interpretable yet!");
--- a/test/ExecutionEngine/test-interp-vec-insertelement.ll
+++ b/test/ExecutionEngine/test-interp-vec-insertelement.ll
@@ -0,0 +1,41 @@
+ ; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+ %v0 = insertelement <2 x i8> zeroinitializer, i8 1, i32 1
+ %v1 = insertelement <3 x i8> zeroinitializer, i8 2, i32 2
+ %v2 = insertelement <4 x i8> zeroinitializer, i8 3, i32 3
+ %v3 = insertelement <8 x i8> zeroinitializer, i8 4, i32 4
+ %v4 = insertelement <16 x i8> zeroinitializer, i8 5, i32 7
+
+ %v5 = insertelement <2 x i16> zeroinitializer, i16 1, i32 1
+ %v6 = insertelement <3 x i16> zeroinitializer, i16 2, i32 2
+ %v7 = insertelement <4 x i16> zeroinitializer, i16 3, i32 3
+ %v8 = insertelement <8 x i16> zeroinitializer, i16 4, i32 4
+ %v9 = insertelement <16 x i16> zeroinitializer, i16 5, i32 7
+
+ %v10 = insertelement <2 x i32> zeroinitializer, i32 1, i32 1
+ %v11 = insertelement <3 x i32> zeroinitializer, i32 2, i32 2
+ %v12 = insertelement <4 x i32> zeroinitializer, i32 3, i32 3
+ %v13 = insertelement <8 x i32> zeroinitializer, i32 4, i32 4
+ %v14 = insertelement <16 x i32> zeroinitializer, i32 5, i32 7
+
+ %v15 = insertelement <2 x i64> zeroinitializer, i64 1, i32 1
+ %v16 = insertelement <3 x i64> zeroinitializer, i64 2, i32 2
+ %v17 = insertelement <4 x i64> zeroinitializer, i64 3, i32 3
+ %v18 = insertelement <8 x i64> zeroinitializer, i64 4, i32 4
+ %v19 = insertelement <16 x i64> zeroinitializer, i64 5, i32 7
+
+ %v20 = insertelement <2 x float> zeroinitializer, float 1.0, i32 1
+ %v21 = insertelement <3 x float> zeroinitializer, float 2.0, i32 2
+ %v22 = insertelement <4 x float> zeroinitializer, float 3.0, i32 3
+ %v23 = insertelement <8 x float> zeroinitializer, float 4.0, i32 4
+ %v24 = insertelement <16 x float> zeroinitializer, float 5.0, i32 7
+
+ %v25 = insertelement <2 x double> zeroinitializer, double 1.0, i32 1
+ %v26 = insertelement <3 x double> zeroinitializer, double 2.0, i32 2
+ %v27 = insertelement <4 x double> zeroinitializer, double 3.0, i32 3
+ %v28 = insertelement <8 x double> zeroinitializer, double 4.0, i32 4
+ %v29 = insertelement <16 x double> zeroinitializer, double 5.0, i32 7
+
+ ret i32 0
+}
--- a/test/ExecutionEngine/test-interp-vec-select.ll
+++ b/test/ExecutionEngine/test-interp-vec-select.ll
@@ -0,0 +1,118 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+  ; Vector values
+  %a2_i8 = add <2 x i8> zeroinitializer, <i8 0, i8 1>
+  %a3_i8 = add <3 x i8> zeroinitializer, <i8 0, i8 1, i8 2>
+  %a4_i8 = add <4 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3>
+  %a8_i8 = add <8 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %a16_i8 = add <16 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+  %a2_i16 = add <2 x i16> zeroinitializer, <i16 0, i16 1>
+  %a3_i16 = add <3 x i16> zeroinitializer, <i16 0, i16 1, i16 2>
+  %a4_i16 = add <4 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3>
+  %a8_i16 = add <8 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %a16_i16 = add <16 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+
+  %a2_i32 = add <2 x i32> zeroinitializer, <i32 0, i32 1>
+  %a3_i32 = add <3 x i32> zeroinitializer, <i32 0, i32 1, i32 2>
+  %a4_i32 = add <4 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3>
+  %a8_i32 = add <8 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %a16_i32 = add <16 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %a2_i64 = add <2 x i64> zeroinitializer, <i64 0, i64 1>
+  %a3_i64 = add <3 x i64> zeroinitializer, <i64 0, i64 1, i64 2>
+  %a4_i64 = add <4 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3>
+  %a8_i64 = add <8 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %a16_i64 = add <16 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+
+  %a2_float = fadd <2 x float> zeroinitializer, <float 0.0, float 1.0>
+  %a3_float = fadd <3 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0>
+  %a4_float = fadd <4 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0>
+  %a8_float = fadd <8 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>
+  %a16_float = fadd <16 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>
+
+  %a2_double = fadd <2 x double> zeroinitializer, <double 0.0, double 1.0>
+  %a3_double = fadd <3 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0>
+  %a4_double = fadd <4 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0>
+  %a8_double = fadd <8 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>
+  %a16_double = fadd <16 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>
+
+  %b2_i8  = sub <2 x i8> zeroinitializer, %a2_i8
+  %b3_i8  = sub <3 x i8> zeroinitializer, %a3_i8
+  %b4_i8  = sub <4 x i8> zeroinitializer, %a4_i8
+  %b8_i8  = sub <8 x i8> zeroinitializer, %a8_i8
+  %b16_i8 = sub <16 x i8> zeroinitializer, %a16_i8
+
+  %b2_i16  = sub <2 x i16> zeroinitializer, %a2_i16
+  %b3_i16  = sub <3 x i16> zeroinitializer, %a3_i16
+  %b4_i16  = sub <4 x i16> zeroinitializer, %a4_i16
+  %b8_i16  = sub <8 x i16> zeroinitializer, %a8_i16
+  %b16_i16 = sub <16 x i16> zeroinitializer, %a16_i16
+
+  %b2_i32  = sub <2 x i32> zeroinitializer, %a2_i32
+  %b3_i32  = sub <3 x i32> zeroinitializer, %a3_i32
+  %b4_i32  = sub <4 x i32> zeroinitializer, %a4_i32
+  %b8_i32  = sub <8 x i32> zeroinitializer, %a8_i32
+  %b16_i32 = sub <16 x i32> zeroinitializer, %a16_i32
+
+  %b2_i64  = sub <2 x i64> zeroinitializer, %a2_i64
+  %b3_i64  = sub <3 x i64> zeroinitializer, %a3_i64
+  %b4_i64  = sub <4 x i64> zeroinitializer, %a4_i64
+  %b8_i64  = sub <8 x i64> zeroinitializer, %a8_i64
+  %b16_i64 = sub <16 x i64> zeroinitializer, %a16_i64
+
+  %b2_float  = fsub <2 x float> zeroinitializer, %a2_float
+  %b3_float  = fsub <3 x float> zeroinitializer, %a3_float
+  %b4_float  = fsub <4 x float> zeroinitializer, %a4_float
+  %b8_float  = fsub <8 x float> zeroinitializer, %a8_float
+  %b16_float = fsub <16 x float> zeroinitializer, %a16_float
+
+  %b2_double  = fsub <2 x double> zeroinitializer, %a2_double
+  %b3_double  = fsub <3 x double> zeroinitializer, %a3_double
+  %b4_double  = fsub <4 x double> zeroinitializer, %a4_double
+  %b8_double  = fsub <8 x double> zeroinitializer, %a8_double
+  %b16_double = fsub <16 x double> zeroinitializer, %a16_double
+
+
+
+  %v0 = select <2 x i1> <i1 true, i1 false>, <2 x i8> %a2_i8, <2 x i8> %b2_i8
+  %v1 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i8> %a3_i8, <3 x i8> %b3_i8
+  %v2 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> %a4_i8, <4 x i8> %b4_i8
+  %v3 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i8> %a8_i8, <8 x i8> %b8_i8
+  %v4 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> %a16_i8, <16 x i8> %b16_i8
+
+  %v5 = select <2 x i1> <i1 true, i1 false>, <2 x i16> %a2_i16, <2 x i16> %b2_i16
+  %v6 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i16> %a3_i16, <3 x i16> %b3_i16
+  %v7 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i16> %a4_i16, <4 x i16> %b4_i16
+  %v8 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a8_i16, <8 x i16> %b8_i16
+  %v9 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i16> %a16_i16, <16 x i16> %b16_i16
+
+  %v10 = select <2 x i1> <i1 true, i1 false>, <2 x i32> %a2_i32, <2 x i32> %b2_i32
+  %v11 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i32> %a3_i32, <3 x i32> %b3_i32
+  %v12 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a4_i32, <4 x i32> %b4_i32
+  %v13 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a8_i32, <8 x i32> %b8_i32
+  %v14 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i32> %a16_i32, <16 x i32> %b16_i32
+
+  %v15 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a2_i64, <2 x i64> %b2_i64
+  %v16 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i64> %a3_i64, <3 x i64> %b3_i64
+  %v17 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %a4_i64, <4 x i64> %b4_i64
+  %v18 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i64> %a8_i64, <8 x i64> %b8_i64
+  %v19 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i64> %a16_i64, <16 x i64> %b16_i64
+
+  %v20 = select <2 x i1> <i1 true, i1 false>, <2 x float> %a2_float, <2 x float> %b2_float
+  %v21 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x float> %a3_float, <3 x float> %b3_float
+  %v22 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a4_float, <4 x float> %b4_float
+  %v23 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x float> %a8_float, <8 x float> %b8_float
+  %v24 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x float> %a16_float, <16 x float> %b16_float
+
+  %v25 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a2_double, <2 x double> %b2_double
+  %v26 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x double> %a3_double, <3 x double> %b3_double
+  %v27 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a4_double, <4 x double> %b4_double
+  %v28 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x double> %a8_double, <8 x double> %b8_double
+  %v29 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x double> %a16_double, <16 x double> %b16_double
+
+
+  ret i32 0
+}
--- a/test/ExecutionEngine/test-interp-vec-shuffle.ll
+++ b/test/ExecutionEngine/test-interp-vec-shuffle.ll
@@ -0,0 +1,81 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+  ; Vector values
+  %a2_i8 = add <2 x i8> zeroinitializer, <i8 0, i8 1>
+  %a3_i8 = add <3 x i8> zeroinitializer, <i8 0, i8 1, i8 2>
+  %a4_i8 = add <4 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3>
+  %a8_i8 = add <8 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %a16_i8 = add <16 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+  %a2_i16 = add <2 x i16> zeroinitializer, <i16 0, i16 1>
+  %a3_i16 = add <3 x i16> zeroinitializer, <i16 0, i16 1, i16 2>
+  %a4_i16 = add <4 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3>
+  %a8_i16 = add <8 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %a16_i16 = add <16 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+
+  %a2_i32 = add <2 x i32> zeroinitializer, <i32 0, i32 1>
+  %a3_i32 = add <3 x i32> zeroinitializer, <i32 0, i32 1, i32 2>
+  %a4_i32 = add <4 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3>
+  %a8_i32 = add <8 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %a16_i32 = add <16 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %a2_i64 = add <2 x i64> zeroinitializer, <i64 0, i64 1>
+  %a3_i64 = add <3 x i64> zeroinitializer, <i64 0, i64 1, i64 2>
+  %a4_i64 = add <4 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3>
+  %a8_i64 = add <8 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %a16_i64 = add <16 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+
+  %a2_float = fadd <2 x float> zeroinitializer, <float 0.0, float 1.0>
+  %a3_float = fadd <3 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0>
+  %a4_float = fadd <4 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0>
+  %a8_float = fadd <8 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>
+  %a16_float = fadd <16 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>
+
+  %a2_double = fadd <2 x double> zeroinitializer, <double 0.0, double 1.0>
+  %a3_double = fadd <3 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0>
+  %a4_double = fadd <4 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0>
+  %a8_double = fadd <8 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>
+  %a16_double = fadd <16 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>
+
+
+  %v0 = shufflevector <2 x i8> %a2_i8, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  %v1 = shufflevector <3 x i8> %a3_i8, <3 x i8>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v2 = shufflevector <4 x i8> %a4_i8, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v3 = shufflevector <8 x i8> %a8_i8, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v4 = shufflevector <16 x i8> %a16_i8, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v5 = shufflevector <2 x i16> %a2_i16, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  %v6 = shufflevector <3 x i16> %a3_i16, <3 x i16>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v7 = shufflevector <4 x i16> %a4_i16, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8 = shufflevector <8 x i16> %a8_i16, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v9 = shufflevector <16 x i16> %a16_i16, <16 x i16>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v10 = shufflevector <2 x i32> %a2_i32, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  %v11 = shufflevector <3 x i32> %a3_i32, <3 x i32>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v12 = shufflevector <4 x i32> %a4_i32, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v13 = shufflevector <8 x i32> %a8_i32, <8 x i32>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v14 = shufflevector <16 x i32> %a16_i32, <16 x i32>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v15 = shufflevector <2 x i64> %a2_i64, <2 x i64>undef, <2 x i32> <i32 1, i32 0>
+  %v16 = shufflevector <3 x i64> %a3_i64, <3 x i64>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v17 = shufflevector <4 x i64> %a4_i64, <4 x i64>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v18 = shufflevector <8 x i64> %a8_i64, <8 x i64>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v19 = shufflevector <16 x i64> %a16_i64, <16 x i64>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v20 = shufflevector <2 x float> %a2_float, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  %v21 = shufflevector <3 x float> %a3_float, <3 x float>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v22 = shufflevector <4 x float> %a4_float, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v23 = shufflevector <8 x float> %a8_float, <8 x float>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v24 = shufflevector <16 x float> %a16_float, <16 x float>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v25 = shufflevector <2 x double> %a2_double, <2 x double>undef, <2 x i32> <i32 1, i32 0>
+  %v26 = shufflevector <3 x double> %a3_double, <3 x double>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v27 = shufflevector <4 x double> %a4_double, <4 x double>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v28 = shufflevector <8 x double> %a8_double, <8 x double>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v29 = shufflevector <16 x double> %a16_double, <16 x double>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ret i32 0
+}
+