From da521cc1cc733ee1c27b00e4c0e365c8b702e2e0 Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel.sanders@imgtec.com>
Date: Mon, 23 Sep 2013 12:02:46 +0000
Subject: [PATCH] [mips][msa] Implemented build_vector using ldi, fill, and
 custom SelectionDAG nodes (VSPLAT and VSPLATD)

Note: There's a later patch on my branch that re-implements this to select
build_vector without the custom SelectionDAG nodes. The future patch avoids
the constant-folding problems stemming from the custom node (i.e. it doesn't
need to re-implement all the DAG combines related to BUILD_VECTOR).

Changes to MIPS specific SelectionDAG nodes:
* Added VSPLAT
    This is a special case of BUILD_VECTOR that covers the case the
    BUILD_VECTOR is a splat operation.
* Added VSPLATD
    This is a special case of VSPLAT that handles the cases when v2i64 is legal


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191191 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MipsISelLowering.cpp          |   2 +
 lib/Target/Mips/MipsISelLowering.h            |   8 +-
 lib/Target/Mips/MipsMSAInstrInfo.td           |  26 ++--
 lib/Target/Mips/MipsSEISelLowering.cpp        | 109 ++++++++++++++++
 lib/Target/Mips/MipsSEISelLowering.h          |   5 +
 test/CodeGen/Mips/msa/basic_operations.ll     | 120 ++++++++++++++++++
 .../Mips/msa/basic_operations_float.ll        |  57 +++++++++
 7 files changed, 316 insertions(+), 11 deletions(-)
 create mode 100644 test/CodeGen/Mips/msa/basic_operations.ll
 create mode 100644 test/CodeGen/Mips/msa/basic_operations_float.ll

diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 220955ea662..21c5edb39cb 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -212,6 +212,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::VANY_ZERO:         return "MipsISD::VANY_ZERO";
   case MipsISD::VALL_NONZERO:      return "MipsISD::VALL_NONZERO";
   case MipsISD::VANY_NONZERO:      return "MipsISD::VANY_NONZERO";
+  case MipsISD::VSPLAT:            return "MipsISD::VSPLAT";
+  case MipsISD::VSPLATD:           return "MipsISD::VSPLATD";
   default:                         return NULL;
   }
 }
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 85aa162fb41..57b5603ac2d 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -152,12 +152,18 @@ namespace llvm {
       SETCC_DSP,
       SELECT_CC_DSP,
 
-      // Vector comparisons
+      // Vector comparisons.
       VALL_ZERO,
       VANY_ZERO,
       VALL_NONZERO,
       VANY_NONZERO,
 
+      // Special case of BUILD_VECTOR where all elements are the same.
+      VSPLAT,
+      // Special case of VSPLAT where the result is v2i64, the operand is
+      // constant, and the operand fits in a signed 10-bits value.
+      VSPLATD,
+
       // Load/Store Left/Right nodes.
       LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LWR,
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index d4dcbd1f5e1..68b835e0572 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -11,12 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+def SDT_MipsSplat : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>;
 def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
 
 def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>;
 def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>;
 def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>;
 def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>;
+def MipsVSplat  : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>;
+def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>;
+
+def vsplati8  : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>;
+def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>;
+def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>;
+def vsplati64 : PatFrag<(ops node:$in), (v2i64 (MipsVSplatD (i32 node:$in)))>;
 
 // Immediates
 def immSExt5 : ImmLeaf<i32, [{return isInt<5>(Imm);}]>;
@@ -1383,12 +1391,9 @@ class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w,
 class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d,
                                       MSA128D, MSA128W>;
 
-class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", int_mips_fill_b,
-                                     MSA128B, GPR32>;
-class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h,
-                                     MSA128H, GPR32>;
-class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w,
-                                     MSA128W, GPR32>;
+class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", vsplati8,  MSA128B, GPR32>;
+class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", vsplati16, MSA128H, GPR32>;
+class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", vsplati32, MSA128W, GPR32>;
 
 class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>;
 class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>;
@@ -1573,10 +1578,10 @@ class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128H>;
 class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128W>;
 class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128D>;
 
-class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, MSA128B>;
-class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, MSA128H>;
-class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, MSA128W>;
-class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, MSA128D>;
+class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", vsplati8,  MSA128B>;
+class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", vsplati16, MSA128H>;
+class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", vsplati32, MSA128W>;
+class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", vsplati64, MSA128D>;
 
 class LDX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                     ValueType TyNode, RegisterClass RCWD,
@@ -2356,6 +2361,7 @@ def LD_D: LD_D_ENC, LD_D_DESC;
 def LDI_B : LDI_B_ENC, LDI_B_DESC;
 def LDI_H : LDI_H_ENC, LDI_H_DESC;
 def LDI_W : LDI_W_ENC, LDI_W_DESC;
+def LDI_D : LDI_D_ENC, LDI_D_DESC;
 
 def LDX_B: LDX_B_ENC, LDX_B_DESC;
 def LDX_H: LDX_H_ENC, LDX_H_DESC;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 879df6d1299..3b446c5a13a 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -147,6 +147,7 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
   return new MipsSETargetLowering(TM);
 }
 
+// Enable MSA support for the given integer type and Register class.
 void MipsSETargetLowering::
 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
   addRegisterClass(Ty, RC);
@@ -158,6 +159,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
   setOperationAction(ISD::BITCAST, Ty, Legal);
   setOperationAction(ISD::LOAD, Ty, Legal);
   setOperationAction(ISD::STORE, Ty, Legal);
+  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
 
   setOperationAction(ISD::ADD, Ty, Legal);
   setOperationAction(ISD::CTLZ, Ty, Legal);
@@ -170,6 +172,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
   setOperationAction(ISD::UDIV, Ty, Legal);
 }
 
+// Enable MSA support for the given floating-point type and Register class.
 void MipsSETargetLowering::
 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
   addRegisterClass(Ty, RC);
@@ -224,6 +227,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
   case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::INTRINSIC_VOID:     return lowerINTRINSIC_VOID(Op, DAG);
+  case ISD::BUILD_VECTOR:       return lowerBUILD_VECTOR(Op, DAG);
   }
 
   return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -921,6 +925,10 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_fdiv_w:
   case Intrinsic::mips_fdiv_d:
     return lowerMSABinaryIntr(Op, DAG, ISD::FDIV);
+  case Intrinsic::mips_fill_b:
+  case Intrinsic::mips_fill_h:
+  case Intrinsic::mips_fill_w:
+    return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT);
   case Intrinsic::mips_flog2_w:
   case Intrinsic::mips_flog2_d:
     return lowerMSAUnaryIntr(Op, DAG, ISD::FLOG2);
@@ -936,6 +944,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_fsub_w:
   case Intrinsic::mips_fsub_d:
     return lowerMSABinaryIntr(Op, DAG, ISD::FSUB);
+  case Intrinsic::mips_ldi_b:
+  case Intrinsic::mips_ldi_h:
+  case Intrinsic::mips_ldi_w:
+  case Intrinsic::mips_ldi_d:
+    return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT);
   case Intrinsic::mips_mulv_b:
   case Intrinsic::mips_mulv_h:
   case Intrinsic::mips_mulv_w:
@@ -1073,6 +1086,102 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
   }
 }
 
+/// \brief Check if the given BuildVectorSDNode is a splat.
+/// This method currently relies on DAG nodes being reused when equivalent,
+/// so it's possible for this to return false even when isConstantSplat returns
+/// true.
+static bool isSplatVector(const BuildVectorSDNode *N) {
+  EVT VT = N->getValueType(0);
+  assert(VT.isVector() && "Expected a vector type");
+
+  unsigned int nOps = N->getNumOperands();
+  assert(nOps > 1 && "isSplat has 0 or 1 sized build vector");
+
+  SDValue Operand0 = N->getOperand(0);
+
+  for (unsigned int i = 1; i < nOps; ++i) {
+    if (N->getOperand(i) != Operand0)
+      return false;
+  }
+
+  return true;
+}
+
+// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
+// backend.
+//
+// Lowers according to the following rules:
+// - Vectors of 128-bits may be legal subject to the other rules. Other sizes
+//   are not legal.
+// - Non-constant splats are legal and are lowered to MipsISD::VSPLAT.
+// - Constant splats with an element size of 32-bits or less are legal and are
+//   lowered to MipsISD::VSPLAT.
+// - Constant splats with an element size of 64-bits but whose value would fit
+//   within a 10 bit immediate are legal and are lowered to MipsISD::VSPLATD.
+// - All other ISD::BUILD_VECTORS are not legal
+SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+  EVT ResTy = Op->getValueType(0);
+  SDLoc DL(Op);
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+
+  if (!Subtarget->hasMSA() || !ResTy.is128BitVector())
+    return SDValue();
+
+  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                            HasAnyUndefs, 8,
+                            !Subtarget->isLittle())) {
+    SDValue Result;
+    EVT TmpVecTy;
+    EVT ConstTy = MVT::i32;
+    unsigned SplatOp = MipsISD::VSPLAT;
+
+    switch (SplatBitSize) {
+    default:
+      return SDValue();
+    case 64:
+      TmpVecTy = MVT::v2i64;
+
+      // i64 is an illegal type on Mips32, but if it the constant fits into a
+      // signed 10-bit value then we can still handle it using VSPLATD and an
+      // i32 constant
+      if (HasMips64)
+        ConstTy = MVT::i64;
+      else if (isInt<10>(SplatValue.getSExtValue())) {
+        SplatValue = SplatValue.trunc(32);
+        SplatOp = MipsISD::VSPLATD;
+      } else
+        return SDValue();
+      break;
+    case 32:
+      TmpVecTy = MVT::v4i32;
+      break;
+    case 16:
+      TmpVecTy = MVT::v8i16;
+      SplatValue = SplatValue.sext(32);
+      break;
+    case 8:
+      TmpVecTy = MVT::v16i8;
+      SplatValue = SplatValue.sext(32);
+      break;
+    }
+
+    Result = DAG.getNode(SplatOp, DL, TmpVecTy,
+                         DAG.getConstant(SplatValue, ConstTy));
+    if (ResTy != Result.getValueType())
+      Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result);
+
+    return Result;
+  }
+  else if (isSplatVector(Node))
+    return DAG.getNode(MipsISD::VSPLAT, DL, ResTy, Op->getOperand(0));
+
+  return SDValue();
+}
+
 MachineBasicBlock * MipsSETargetLowering::
 emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
   // $bb:
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 016d4adbf73..909ab7dfde1 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -22,7 +22,11 @@ namespace llvm {
   public:
     explicit MipsSETargetLowering(MipsTargetMachine &TM);
 
+    /// \brief Enable MSA support for the given integer type and Register
+    /// class.
     void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
+    /// \brief Enable MSA support for the given floating-point type and
+    /// Register class.
     void addMSAFloatType(MVT::SimpleValueType Ty,
                          const TargetRegisterClass *RC);
 
@@ -69,6 +73,7 @@ namespace llvm {
     SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
 
     MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
                                     MachineBasicBlock *BB) const;
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
new file mode 100644
index 00000000000..4cec6aa28aa
--- /dev/null
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -0,0 +1,120 @@
+; RUN: llc -march=mips -mattr=+msa < %s | FileCheck -check-prefix=MIPS32 %s
+
+@v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+@v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+@v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+@v2i64 = global <2 x i64> <i64 0, i64 0>
+@i64 = global i64 0
+
+define void @const_v16i8() nounwind {
+  ; MIPS32: const_v16i8:
+
+  store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>*@v16i8
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
+  ; MIPS32: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
+  ; MIPS32: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 1, i8 2>, <16 x i8>*@v16i8
+  ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 258
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>, <16 x i8>*@v16i8
+  ; MIPS32-DAG: lui [[R2:\$[0-9]+]], 258
+  ; MIPS32-DAG: ori [[R2]], [[R2]], 772
+  ; MIPS32-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
+  ; MIPS32: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v16i8
+}
+
+define void @const_v8i16() nounwind {
+  ; MIPS32: const_v8i16:
+
+  store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>*@v8i16
+  ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
+  ; MIPS32: ld.h  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 4
+
+  store volatile <8 x i16> <i16 1, i16 2, i16 1, i16 2, i16 1, i16 2, i16 1, i16 2>, <8 x i16>*@v8i16
+  ; MIPS32-DAG: lui [[R2:\$[0-9]+]], 1
+  ; MIPS32-DAG: ori [[R2]], [[R2]], 2
+  ; MIPS32-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+
+  store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
+  ; MIPS32: ld.h  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v8i16
+}
+
+define void @const_v4i32() nounwind {
+  ; MIPS32: const_v4i32:
+
+  store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>*@v4i32
+  ; MIPS32: ldi.w [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>, <4 x i32>*@v4i32
+  ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v4i32
+}
+
+define void @const_v2i64() nounwind {
+  ; MIPS32: const_v2i64:
+
+  store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <2 x i64> <i64 72340172838076673, i64 72340172838076673>, <2 x i64>*@v2i64
+  ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 281479271743489, i64 281479271743489>, <2 x i64>*@v2i64
+  ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 4294967297, i64 4294967297>, <2 x i64>*@v2i64
+  ; MIPS32: ldi.w [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 1, i64 1>, <2 x i64>*@v2i64
+  ; MIPS32: ldi.d [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v2i64
+}
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
new file mode 100644
index 00000000000..19213758773
--- /dev/null
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=mips -mattr=+msa < %s | FileCheck -check-prefix=MIPS32 %s
+
+@v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+@v2f64 = global <2 x double> <double 0.0, double 0.0>
+
+define void @const_v4f32() nounwind {
+  ; MIPS32: const_v4f32:
+
+  store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v4f32
+}
+
+define void @const_v2f64() nounwind {
+  ; MIPS32: const_v2f64:
+
+  store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v2f64
+}