Add support for additional vector instructions in the interpreter.

patch by Veselov, Yuri <Yuri.Veselov@intel.com>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179409 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-20 00:20:11 +00:00 · 2013-04-12 20:45:20 +00:00
parent 3f42936af8
commit affe889d08
6 changed files with 450 additions and 44 deletions
@@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
      Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
      break;

+#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY)                        \
+  case Type::VectorTyID: {                                           \
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());    \
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );            \
+    for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)             \
+      Dest.AggregateVal[_i].IntVal = APInt(1,                        \
+      Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\
+  } break;
+
 // Handle pointers specially because they must be compared with only as much
 // width as the host has.  We _do not_ want to be comparing 64 bit values when
 // running on a 32-bit target, otherwise the upper 32 bits might mess up
@@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(eq,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty);
    IMPLEMENT_POINTER_ICMP(==);
  default:
    dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
@@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(ne,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty);
    IMPLEMENT_POINTER_ICMP(!=);
  default:
    dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
@@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(ult,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty);
    IMPLEMENT_POINTER_ICMP(<);
  default:
    dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
@@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(slt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty);
    IMPLEMENT_POINTER_ICMP(<);
  default:
    dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
@@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty);
    IMPLEMENT_POINTER_ICMP(>);
  default:
    dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
@@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty);
    IMPLEMENT_POINTER_ICMP(>);
  default:
    dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
@@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(ule,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty);
    IMPLEMENT_POINTER_ICMP(<=);
  default:
    dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
@@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(sle,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty);
    IMPLEMENT_POINTER_ICMP(<=);
  default:
    dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
@@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(uge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty);
    IMPLEMENT_POINTER_ICMP(>=);
  default:
    dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
@@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_INTEGER_ICMP(sge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty);
    IMPLEMENT_POINTER_ICMP(>=);
  default:
    dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
@@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
     Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
     break

+#define IMPLEMENT_VECTOR_FCMP_T(OP, TY)                             \
+  assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());     \
+  Dest.AggregateVal.resize( Src1.AggregateVal.size() );             \
+  for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)              \
+    Dest.AggregateVal[_i].IntVal = APInt(1,                         \
+    Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\
+  break;
+
+#define IMPLEMENT_VECTOR_FCMP(OP)                                   \
+  case Type::VectorTyID:                                            \
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+      IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
+    } else {                                                        \
+        IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
+    }
+
 static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
                                   Type *Ty) {
  GenericValue Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_FCMP(==, Float);
    IMPLEMENT_FCMP(==, Double);
+    IMPLEMENT_VECTOR_FCMP(==);
  default:
    dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
    llvm_unreachable(0);
@@ -298,17 +334,62 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
  return Dest;
 }

+#define IMPLEMENT_SCALAR_NANS(TY, X,Y)                                      \
+  if (TY->isFloatTy()) {                                                    \
+    if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {             \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) {    \
+    Dest.IntVal = APInt(1,false);                                           \
+    return Dest;                                                            \
+  }
+
+#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG)                                   \
+  assert(X.AggregateVal.size() == Y.AggregateVal.size());                   \
+  Dest.AggregateVal.resize( X.AggregateVal.size() );                        \
+  for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) {                       \
+    if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val ||         \
+        Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val)           \
+      Dest.AggregateVal[_i].IntVal = APInt(1,FLAG);                         \
+    else  {                                                                 \
+      Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG);                        \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
+  if (TY->isVectorTy())                                                     \
+    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+      MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
+    } else {                                                                \
+      MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
+    }                                                                       \
+
+
+
 static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
-                                   Type *Ty) {
+                                    Type *Ty)
+{
  GenericValue Dest;
+  // if input is scalar value and Src1 or Src2 is NaN return false
+  IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2)
+  // if vector input detect NaNs and fill mask
+  MASK_VECTOR_NANS(Ty, Src1, Src2, false)
+  GenericValue DestMask = Dest;
  switch (Ty->getTypeID()) {
    IMPLEMENT_FCMP(!=, Float);
    IMPLEMENT_FCMP(!=, Double);
-
-  default:
-    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
-    llvm_unreachable(0);
+    IMPLEMENT_VECTOR_FCMP(!=);
+    default:
+      dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+      llvm_unreachable(0);
  }
+  // in vector case mask out NaN elements
+  if (Ty->isVectorTy())
+    for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+      if (DestMask.AggregateVal[_i].IntVal == false)
+        Dest.AggregateVal[_i].IntVal = APInt(1,false);
+
  return Dest;
 }

@@ -318,6 +399,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
  switch (Ty->getTypeID()) {
    IMPLEMENT_FCMP(<=, Float);
    IMPLEMENT_FCMP(<=, Double);
+    IMPLEMENT_VECTOR_FCMP(<=);
  default:
    dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
    llvm_unreachable(0);
@@ -331,6 +413,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
  switch (Ty->getTypeID()) {
    IMPLEMENT_FCMP(>=, Float);
    IMPLEMENT_FCMP(>=, Double);
+    IMPLEMENT_VECTOR_FCMP(>=);
  default:
    dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
    llvm_unreachable(0);
@@ -344,6 +427,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
  switch (Ty->getTypeID()) {
    IMPLEMENT_FCMP(<, Float);
    IMPLEMENT_FCMP(<, Double);
+    IMPLEMENT_VECTOR_FCMP(<);
  default:
    dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
    llvm_unreachable(0);
@@ -357,6 +441,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
  switch (Ty->getTypeID()) {
    IMPLEMENT_FCMP(>, Float);
    IMPLEMENT_FCMP(>, Double);
+    IMPLEMENT_VECTOR_FCMP(>);
  default:
    dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
    llvm_unreachable(0);
@@ -375,18 +460,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
    return Dest;                                                         \
  }

+#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC)                       \
+  if (TY->isVectorTy()) {                                                \
+    GenericValue DestMask = Dest;                                        \
+    Dest = _FUNC(Src1, Src2, Ty);                                        \
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)               \
+        if (DestMask.AggregateVal[_i].IntVal == true)                    \
+          Dest.AggregateVal[_i].IntVal = APInt(1,true);                  \
+      return Dest;                                                       \
+  }

 static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
                                   Type *Ty) {
  GenericValue Dest;
  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ)
  return executeFCMP_OEQ(Src1, Src2, Ty);
+
 }

 static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
                                   Type *Ty) {
  GenericValue Dest;
  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE)
  return executeFCMP_ONE(Src1, Src2, Ty);
 }

@@ -394,6 +493,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
                                   Type *Ty) {
  GenericValue Dest;
  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE)
  return executeFCMP_OLE(Src1, Src2, Ty);
 }

@@ -401,6 +502,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
                                   Type *Ty) {
  GenericValue Dest;
  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE)
  return executeFCMP_OGE(Src1, Src2, Ty);
 }

@@ -408,6 +511,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
                                   Type *Ty) {
  GenericValue Dest;
  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT)
  return executeFCMP_OLT(Src1, Src2, Ty);
 }

@@ -415,33 +520,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
                                     Type *Ty) {
  GenericValue Dest;
  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT)
  return executeFCMP_OGT(Src1, Src2, Ty);
 }

 static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
                                     Type *Ty) {
  GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal ==
+        Src1.AggregateVal[_i].FloatVal) &&
+        (Src2.AggregateVal[_i].FloatVal ==
+        Src2.AggregateVal[_i].FloatVal)));
+    } else {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].DoubleVal ==
+        Src1.AggregateVal[_i].DoubleVal) &&
+        (Src2.AggregateVal[_i].DoubleVal ==
+        Src2.AggregateVal[_i].DoubleVal)));
+    }
+  } else if (Ty->isFloatTy())
    Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
                           Src2.FloatVal == Src2.FloatVal));
-  else
+  else {
    Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && 
                           Src2.DoubleVal == Src2.DoubleVal));
+  }
  return Dest;
 }

 static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
                                     Type *Ty) {
  GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal !=
+           Src1.AggregateVal[_i].FloatVal) ||
+          (Src2.AggregateVal[_i].FloatVal !=
+           Src2.AggregateVal[_i].FloatVal)));
+      } else {
+        for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+          Dest.AggregateVal[_i].IntVal = APInt(1,
+          ( (Src1.AggregateVal[_i].DoubleVal !=
+             Src1.AggregateVal[_i].DoubleVal) ||
+            (Src2.AggregateVal[_i].DoubleVal !=
+             Src2.AggregateVal[_i].DoubleVal)));
+      }
+  } else if (Ty->isFloatTy())
    Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
                           Src2.FloatVal != Src2.FloatVal));
-  else
+  else {
    Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || 
                           Src2.DoubleVal != Src2.DoubleVal));
+  }
  return Dest;
 }

+static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty, const bool val) {
+  GenericValue Dest;
+    if(Ty->isVectorTy()) {
+      assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+      Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,val);
+    } else {
+      Dest.IntVal = APInt(1, val);
+    }
+
+    return Dest;
+}
+
 void Interpreter::visitFCmpInst(FCmpInst &I) {
  ExecutionContext &SF = ECStack.back();
  Type *Ty    = I.getOperand(0)->getType();
@@ -450,8 +610,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
  GenericValue R;   // Result
  
  switch (I.getPredicate()) {
-  case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
-  case FCmpInst::FCMP_TRUE:  R.IntVal = APInt(1,true); break;
+  default:
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  break;
+  case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); 
+  break;
+  case FCmpInst::FCMP_TRUE:  R = executeFCMP_BOOL(Src1, Src2, Ty, true); 
+  break;
  case FCmpInst::FCMP_ORD:   R = executeFCMP_ORD(Src1, Src2, Ty); break;
  case FCmpInst::FCMP_UNO:   R = executeFCMP_UNO(Src1, Src2, Ty); break;
  case FCmpInst::FCMP_UEQ:   R = executeFCMP_UEQ(Src1, Src2, Ty); break;
@@ -466,9 +632,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
  case FCmpInst::FCMP_OLE:   R = executeFCMP_OLE(Src1, Src2, Ty); break;
  case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
  case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
-  default:
-    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
-    llvm_unreachable(0);
  }
 
  SetValue(&I, R, SF);
@@ -502,16 +665,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
  case FCmpInst::FCMP_ULE:   return executeFCMP_ULE(Src1, Src2, Ty);
  case FCmpInst::FCMP_OGE:   return executeFCMP_OGE(Src1, Src2, Ty);
  case FCmpInst::FCMP_UGE:   return executeFCMP_UGE(Src1, Src2, Ty);
-  case FCmpInst::FCMP_FALSE: { 
-    GenericValue Result;
-    Result.IntVal = APInt(1, false);
-    return Result;
-  }
-  case FCmpInst::FCMP_TRUE: {
-    GenericValue Result;
-    Result.IntVal = APInt(1, true);
-    return Result;
-  }
+  case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false);
+  case FCmpInst::FCMP_TRUE:  return executeFCMP_BOOL(Src1, Src2, Ty, true);
  default:
    dbgs() << "Unhandled Cmp predicate\n";
    llvm_unreachable(0);
@@ -525,27 +680,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
  GenericValue R;   // Result

-  switch (I.getOpcode()) {
-  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
-  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
-  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
-  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
-  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
-  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
-  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
-  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
-  case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
-  case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
-  case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
-  case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
-  case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
-  case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
-  case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
-  default:
-    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
-    llvm_unreachable(0);
-  }
+  // First process vector operation
+  if (Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    R.AggregateVal.resize(Src1.AggregateVal.size());

+    // Macros to execute binary operation 'OP' over integer vectors
+#define INTEGER_VECTOR_OPERATION(OP)                               \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal;
+
+    // Additional macros to execute binary operations udiv/sdiv/urem/srem since
+    // they have different notation.
+#define INTEGER_VECTOR_FUNCTION(OP)                                \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal);
+
+    // Macros to execute binary operation 'OP' over floating point type TY
+    // (float or double) vectors
+#define FLOAT_VECTOR_FUNCTION(OP, TY)                               \
+      for (unsigned i = 0; i < R.AggregateVal.size(); ++i)          \
+        R.AggregateVal[i].TY =                                      \
+        Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY;
+
+    // Macros to choose appropriate TY: float or double and run operation
+    // execution
+#define FLOAT_VECTOR_OP(OP) {                                         \
+  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+    FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
+  else {                                                              \
+    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+      FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
+    else {                                                            \
+      dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
+      llvm_unreachable(0);                                            \
+    }                                                                 \
+  }                                                                   \
+}
+
+    switch(I.getOpcode()){
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   INTEGER_VECTOR_OPERATION(+) break;
+    case Instruction::Sub:   INTEGER_VECTOR_OPERATION(-) break;
+    case Instruction::Mul:   INTEGER_VECTOR_OPERATION(*) break;
+    case Instruction::UDiv:  INTEGER_VECTOR_FUNCTION(udiv) break;
+    case Instruction::SDiv:  INTEGER_VECTOR_FUNCTION(sdiv) break;
+    case Instruction::URem:  INTEGER_VECTOR_FUNCTION(urem) break;
+    case Instruction::SRem:  INTEGER_VECTOR_FUNCTION(srem) break;
+    case Instruction::And:   INTEGER_VECTOR_OPERATION(&) break;
+    case Instruction::Or:    INTEGER_VECTOR_OPERATION(|) break;
+    case Instruction::Xor:   INTEGER_VECTOR_OPERATION(^) break;
+    case Instruction::FAdd:  FLOAT_VECTOR_OP(+) break;
+    case Instruction::FSub:  FLOAT_VECTOR_OP(-) break;
+    case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
+    case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
+    case Instruction::FRem:
+      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+          R.AggregateVal[i].FloatVal = 
+          fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
+      else {
+        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+          for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+            R.AggregateVal[i].DoubleVal = 
+            fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
+        else {
+          dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+          llvm_unreachable(0);
+        }
+      }
+      break;
+    }
+  } else {
+    switch (I.getOpcode()) {
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+    case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+    case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+    case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+    case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+    case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+    case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+    case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
+    case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+    case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+    case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+    case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+    case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
+    case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
+    case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+    }
+  }
  SetValue(&I, R, SF);
 }

@@ -0,0 +1,20 @@
+; RUN: %lli %s > /dev/null
+
+
+define i32 @main() {
+
+    %A_float = fadd <4 x float> <float 0.0, float 11.0, float 22.0, float 33.0>, <float 44.0, float 55.0, float 66.0, float 77.0>
+    %B_float = fsub <4 x float> %A_float, <float 88.0, float 99.0, float 100.0, float 111.0>
+    %C_float = fmul <4 x float> %B_float, %B_float
+    %D_float = fdiv <4 x float> %C_float, %B_float
+    %E_float = frem <4 x float> %D_float, %A_float
+
+
+    %A_double = fadd <3 x double> <double 0.0, double 111.0, double 222.0>, <double 444.0, double 555.0, double 665.0>
+    %B_double = fsub <3 x double> %A_double, <double 888.0, double 999.0, double 1001.0>
+    %C_double = fmul <3 x double> %B_double, %B_double
+    %D_double = fdiv <3 x double> %C_double, %B_double
+    %E_double = frem <3 x double> %D_double, %A_double
+
+    ret i32 0
+}
@@ -0,0 +1,37 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %A_i8 = add <5 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4>, <i8 12, i8 34, i8 56, i8 78, i8 89>
+    %B_i8 = sub <5 x i8> %A_i8, <i8 11, i8 22, i8 33, i8 44, i8 55>
+    %C_i8 = mul <5 x i8> %B_i8, %B_i8
+    %D_i8 = sdiv <5 x i8> %C_i8, %C_i8
+    %E_i8 = srem <5 x i8> %D_i8, %D_i8
+    %F_i8 = udiv <5 x i8> <i8 5, i8 6, i8 7, i8 8, i8 9>, <i8 6, i8 5, i8 4, i8 3, i8 2>
+    %G_i8 = urem <5 x i8> <i8 6, i8 7, i8 8, i8 9, i8 10>, <i8 5, i8 4, i8 2, i8 2, i8 1>
+
+    %A_i16 = add <4 x i16> <i16 0, i16 1, i16 2, i16 3>, <i16 123, i16 345, i16 567, i16 789>
+    %B_i16 = sub <4 x i16> %A_i16, <i16 111, i16 222, i16 333, i16 444>
+    %C_i16 = mul <4 x i16> %B_i16, %B_i16
+    %D_i16 = sdiv <4 x i16> %C_i16, %C_i16
+    %E_i16 = srem <4 x i16> %D_i16, %D_i16
+    %F_i16 = udiv <4 x i16> <i16 5, i16 6, i16 7, i16 8>, <i16 6, i16 5, i16 4, i16 3>
+    %G_i16 = urem <4 x i16> <i16 6, i16 7, i16 8, i16 9>, <i16 5, i16 4, i16 3, i16 2>
+
+    %A_i32 = add <3 x i32> <i32 0, i32 1, i32 2>, <i32 1234, i32 3456, i32 5678>
+    %B_i32 = sub <3 x i32> %A_i32, <i32 1111, i32 2222, i32 3333>
+    %C_i32 = mul <3 x i32> %B_i32, %B_i32
+    %D_i32 = sdiv <3 x i32> %C_i32, %C_i32
+    %E_i32 = srem <3 x i32> %D_i32, %D_i32
+    %F_i32 = udiv <3 x i32> <i32 5, i32 6, i32 7>, <i32 6, i32 5, i32 4>
+    %G_i32 = urem <3 x i32> <i32 6, i32 7, i32 8>, <i32 5, i32 4, i32 3>
+
+    %A_i64 = add <2 x i64> <i64 0, i64 1>, <i64 12455, i64 34567>
+    %B_i64 = sub <2 x i64> %A_i64, <i64 11111, i64 22222>
+    %C_i64 = mul <2 x i64> %B_i64, %B_i64
+    %D_i64 = sdiv <2 x i64> %C_i64, %C_i64
+    %E_i64 = srem <2 x i64> %D_i64, %D_i64
+    %F_i64 = udiv <2 x i64> <i64 5, i64 6>, <i64 6, i64 5>
+    %G_i64 = urem <2 x i64> <i64 6, i64 7>, <i64 5, i64 3>
+ 
+    ret i32 0
+}
@@ -0,0 +1,22 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %A_i8 = and <5 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4>, <i8 8, i8 8, i8 8, i8 8, i8 8>
+    %B_i8 = or <5 x i8> %A_i8, <i8 7, i8 7, i8 7, i8 7, i8 7>
+    %C_i8 = xor <5 x i8> %B_i8, %A_i8
+
+    %A_i16 = and <4 x i16> <i16 4, i16 4, i16 4, i16 4>, <i16 8, i16 8, i16 8, i16 8>
+    %B_i16 = or <4 x i16> %A_i16, <i16 7, i16 7, i16 7, i16 7>
+    %C_i16 = xor <4 x i16> %B_i16, %A_i16
+
+    %A_i32 = and <3 x i32> <i32 4, i32 4, i32 4>, <i32 8, i32 8, i32 8>
+    %B_i32 = or <3 x i32> %A_i32, <i32 7, i32 7, i32 7>
+    %C_i32 = xor <3 x i32> %B_i32, %A_i32
+
+    %A_i64 = and <2 x i64> <i64 4, i64 4>, <i64 8, i64 8>
+    %B_i64 = or <2 x i64> %A_i64, <i64 7, i64 7>
+    %C_i64 = xor <2 x i64> %B_i64, %A_i64
+
+    ret i32 0
+}
+
@@ -0,0 +1,25 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %double1 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0>
+    %double2 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0>
+    %float1 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0>
+    %float2 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0>
+    %test49 = fcmp oeq <3 x float> %float1, %float2
+    %test50 = fcmp oge <3 x float> %float1, %float2
+    %test51 = fcmp ogt <3 x float> %float1, %float2
+    %test52 = fcmp ole <3 x float> %float1, %float2
+    %test53 = fcmp olt <3 x float> %float1, %float2
+    %test54 = fcmp une <3 x float> %float1, %float2
+
+    %test55 = fcmp oeq <2 x double> %double1, %double2
+    %test56 = fcmp oge <2 x double> %double1, %double2
+    %test57 = fcmp ogt <2 x double> %double1, %double2
+    %test58 = fcmp ole <2 x double> %double1, %double2
+    %test59 = fcmp olt <2 x double> %double1, %double2
+    %test60 = fcmp une <2 x double> %double1, %double2
+
+    ret i32 0
+}
+
+
@@ -0,0 +1,69 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %int1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %int2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %long1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %long2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %sbyte1 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %sbyte2 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %short1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %short2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %ubyte1 = add <5 x i8>  <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %ubyte2 = add <5 x i8>  <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %uint1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %uint2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %ulong1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %ulong2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %ushort1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %ushort2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %test1 = icmp eq <5 x i8> %ubyte1, %ubyte2
+    %test2 = icmp uge <5 x i8> %ubyte1, %ubyte2
+    %test3 = icmp ugt <5 x i8> %ubyte1, %ubyte2
+    %test4 = icmp ule <5 x i8> %ubyte1, %ubyte2
+    %test5 = icmp ult <5 x i8> %ubyte1, %ubyte2
+    %test6 = icmp ne <5 x i8> %ubyte1, %ubyte2
+    %test7 = icmp eq <4 x i16> %ushort1, %ushort2
+    %test8 = icmp uge <4 x i16> %ushort1, %ushort2
+    %test9 = icmp ugt <4 x i16> %ushort1, %ushort2
+    %test10 = icmp ule <4 x i16> %ushort1, %ushort2
+    %test11 = icmp ult <4 x i16> %ushort1, %ushort2
+    %test12 = icmp ne <4 x i16> %ushort1, %ushort2 
+    %test13 = icmp eq <3 x i32> %uint1, %uint2
+    %test14 = icmp uge <3 x i32> %uint1, %uint2
+    %test15 = icmp ugt <3 x i32> %uint1, %uint2
+    %test16 = icmp ule <3 x i32> %uint1, %uint2
+    %test17 = icmp ult <3 x i32> %uint1, %uint2
+    %test18 = icmp ne <3 x i32> %uint1, %uint2
+    %test19 = icmp eq <2 x i64> %ulong1, %ulong2
+    %test20 = icmp uge <2 x i64> %ulong1, %ulong2
+    %test21 = icmp ugt <2 x i64> %ulong1, %ulong2
+    %test22 = icmp ule <2 x i64> %ulong1, %ulong2
+    %test23 = icmp ult <2 x i64> %ulong1, %ulong2
+    %test24 = icmp ne <2 x i64> %ulong1, %ulong2
+    %test25 = icmp eq <5 x i8> %sbyte1, %sbyte2
+    %test26 = icmp sge <5 x i8> %sbyte1, %sbyte2
+    %test27 = icmp sgt <5 x i8> %sbyte1, %sbyte2
+    %test28 = icmp sle <5 x i8> %sbyte1, %sbyte2
+    %test29 = icmp slt <5 x i8> %sbyte1, %sbyte2
+    %test30 = icmp ne <5 x i8> %sbyte1, %sbyte2
+    %test31 = icmp eq <4 x i16> %short1, %short2
+    %test32 = icmp sge <4 x i16> %short1, %short2
+    %test33 = icmp sgt <4 x i16> %short1, %short2
+    %test34 = icmp sle <4 x i16> %short1, %short2
+    %test35 = icmp slt <4 x i16> %short1, %short2
+    %test36 = icmp ne <4 x i16> %short1, %short2
+    %test37 = icmp eq <3 x i32> %int1, %int2
+    %test38 = icmp sge <3 x i32> %int1, %int2
+    %test39 = icmp sgt <3 x i32> %int1, %int2
+    %test40 = icmp sle <3 x i32> %int1, %int2
+    %test41 = icmp slt <3 x i32> %int1, %int2
+    %test42 = icmp ne <3 x i32> %int1, %int2
+    %test43 = icmp eq <2 x i64> %long1, %long2
+    %test44 = icmp sge <2 x i64> %long1, %long2
+    %test45 = icmp sgt <2 x i64> %long1, %long2
+    %test46 = icmp sle <2 x i64> %long1, %long2
+    %test47 = icmp slt <2 x i64> %long1, %long2
+    %test48 = icmp ne <2 x i64> %long1, %long2
+    ret i32 0
+}