Add support in the disassembler for ignoring the L-bit on certain VEX instructions. Mark instructions that have this behavior. Fixes PR10676.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141065 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-12 23:37:33 +00:00 · 2011-10-04 06:30:42 +00:00 · 2011-10-04 06:30:42 +00:00 · 6744a17dcf
commit 6744a17dcf
parent f143b79b78
11 changed files with 184 additions and 77 deletions
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@ -767,8 +767,6 @@ static int getID(struct InternalInstruction* insn) {
        break;
      }
    
-      if (insn->mode == MODE_64BIT && wFromVEX3of3(insn->vexPrefix[2]))
-        attrMask |= ATTR_REXW;
      if (lFromVEX3of3(insn->vexPrefix[2]))
        attrMask |= ATTR_VEXL;
    }
@ -793,23 +791,55 @@ static int getID(struct InternalInstruction* insn) {
    }
  }
  else {
-    if (insn->rexPrefix & 0x08)
-      attrMask |= ATTR_REXW;
-  
    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
      attrMask |= ATTR_OPSIZE;
    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XS;
    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XD;
-    
  }

+  if (insn->rexPrefix & 0x08)
+    attrMask |= ATTR_REXW;
+  
  if (getIDWithAttrMask(&instructionID, insn, attrMask))
    return -1;
  
  /* The following clauses compensate for limitations of the tables. */
  
+  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
+    /*
+     * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
+     * has precedence since there are no L-bit with W-bit entries in the tables.
+     * So if the L-bit isn't significant we should use the W-bit instead.
+     */
+
+    const struct InstructionSpecifier *spec;
+    uint16_t instructionIDWithWBit;
+    const struct InstructionSpecifier *specWithWBit;
+
+    spec = specifierForUID(instructionID);
+
+    if (getIDWithAttrMask(&instructionIDWithWBit,
+                          insn,
+                          (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+
+    specWithWBit = specifierForUID(instructionIDWithWBit);
+
+    if (instructionID != instructionIDWithWBit) {
+      insn->instructionID = instructionIDWithWBit;
+      insn->spec = specWithWBit;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+
  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
    /*
     * Although for SSE instructions it is usually necessary to treat REX.W+F2
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@ -107,7 +107,7 @@ enum attributeBits {
  ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
  ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
  ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
-  ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XS prefix")\
+  ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XD prefix")\
  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")


--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@ -398,13 +398,17 @@ namespace X86II {
    /// field marked when using a f256 memory references.
    VEX_L       = 1U << 4,

+    // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
+    // prefix. Usually used for scalar instructions. Needed by disassembler.
+    VEX_LIG     = 1U << 5,
+
    /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
    /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
    /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
    /// storing a classifier in the imm8 field.  To simplify our implementation,
    /// we handle this by storeing the classifier in the opcode field and using
    /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
-    Has3DNow0F0FOpcode = 1U << 5
+    Has3DNow0F0FOpcode = 1U << 6
  };

  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@ -113,6 +113,7 @@ class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
 class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }

 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
@ -150,6 +151,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
  bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
                            // to be encoded in a immediate field?
  bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
+  bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
  bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?

  // TSFlags layout should be kept in sync with X86InstrInfo.h.
@ -169,7 +171,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
  let TSFlags{35}    = hasVEX_4VPrefix;
  let TSFlags{36}    = hasVEX_i8ImmReg;
  let TSFlags{37}    = hasVEX_L;
-  let TSFlags{38}    = has3DNow0F0FOpcode;
+  let TSFlags{38}    = ignoresVEX_L;
+  let TSFlags{39}    = has3DNow0F0FOpcode;
 }

 class PseudoI<dag oops, dag iops, list<dag> pattern>
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -342,34 +342,38 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,

 // AVX
 def VMOVSSrr : sse12_move_rr<FR32, v4f32,
-                "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+                "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
+                VEX_LIG;
 def VMOVSDrr : sse12_move_rr<FR64, v2f64,
-                "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+                "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
+                VEX_LIG;

 // For the disassembler
 let isCodeGenOnly = 1 in {
  def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                        (ins VR128:$src1, FR32:$src2),
                        "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                        XS, VEX_4V;
+                        XS, VEX_4V, VEX_LIG;
  def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                        (ins VR128:$src1, FR64:$src2),
                        "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                        XD, VEX_4V;
+                        XD, VEX_4V, VEX_LIG;
 }

 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+  def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
+                 VEX_LIG;
  let AddedComplexity = 20 in
-    def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+    def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
+                   VEX_LIG;
 }

 def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
                  "movss\t{$src, $dst|$dst, $src}",
-                  [(store FR32:$src, addr:$dst)]>, XS, VEX;
+                  [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
 def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                  "movsd\t{$src, $dst|$dst, $src}",
-                  [(store FR64:$src, addr:$dst)]>, XD, VEX;
+                  [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;

 // SSE1 & 2
 let Constraints = "$src1 = $dst" in {
@ -1344,30 +1348,32 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
 }

 defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+                                VEX_LIG;
 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
-                                VEX_W;
+                                VEX_W, VEX_LIG;
 defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
+                                VEX_LIG;
 defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
-                                VEX, VEX_W;
+                                VEX, VEX_W, VEX_LIG;

 // The assembler can recognize rr 64-bit instructions by seeing a rxx
 // register, but the same isn't true when only using memory operands,
 // provide other assembly "l" and "q" forms to address this explicitly
 // where appropriate to do so.
 defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
-                                  VEX_4V;
+                                  VEX_4V, VEX_LIG;
 defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
-                                  VEX_4V, VEX_W;
+                                  VEX_4V, VEX_W, VEX_LIG;
 defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
-                                  VEX_4V;
+                                  VEX_4V, VEX_LIG;
 defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
-                                  VEX_4V;
+                                  VEX_4V, VEX_LIG;
 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
-                                  VEX_4V, VEX_W;
+                                  VEX_4V, VEX_W, VEX_LIG;

 let Predicates = [HasAVX] in {
  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
@ -1447,9 +1453,10 @@ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
 // intructions that only match with the intrinsic form, why create duplicates
 // to let them be recognized by the assembler?
 defm VCVTSD2SI     : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
-                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG;
 defm VCVTSD2SI64   : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
-                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
+                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W,
+                      VEX_LIG;

 defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
                f128mem, load, "cvtsd2si{l}">, XD;
@ -1509,10 +1516,11 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,

 let Pattern = []<dag> in {
 defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
-                               "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+                               "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
+                               VEX, VEX_LIG;
 defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
                               "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
-                               VEX_W;
+                               VEX_W, VEX_LIG;
 defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
                               SSEPackedSingle>, TB, VEX;
@ -1559,12 +1567,12 @@ let Predicates = [HasAVX] in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                       (ins FR64:$src1, FR64:$src2),
                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      VEX_4V;
+                      VEX_4V, VEX_LIG;
 let mayLoad = 1 in
 def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                       (ins FR64:$src1, f64mem:$src2),
                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+                      []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;

 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
          Requires<[HasAVX]>;
@ -1589,12 +1597,12 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                    (ins FR32:$src1, FR32:$src2),
                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, XS, Requires<[HasAVX]>, VEX_4V;
+                    []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
 let mayLoad = 1 in
 def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                    (ins FR32:$src1, f32mem:$src2),
                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
+                    []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;

 let Predicates = [HasAVX] in {
  def : Pat<(f64 (fextend FR32:$src)),
@ -1986,11 +1994,11 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
 defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
                 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
-                 XS, VEX_4V;
+                 XS, VEX_4V, VEX_LIG;
 defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
                 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
-                 XD, VEX_4V;
+                 XD, VEX_4V, VEX_LIG;

 let Constraints = "$src1 = $dst" in {
  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
@ -2045,14 +2053,17 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,

 let Defs = [EFLAGS] in {
  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
-                                  "ucomiss", SSEPackedSingle>, TB, VEX;
+                                  "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
-                                  "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
+                                  "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
+                                  VEX_LIG;
  let Pattern = []<dag> in {
    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
-                                    "comiss", SSEPackedSingle>, TB, VEX;
+                                    "comiss", SSEPackedSingle>, TB, VEX,
+                                    VEX_LIG;
    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
-                                    "comisd", SSEPackedDouble>, TB, OpSize, VEX;
+                                    "comisd", SSEPackedDouble>, TB, OpSize, VEX,
+                                    VEX_LIG;
  }

  defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
@ -2744,32 +2755,32 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {

 // Binary Arithmetic instructions
 defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
-            basic_sse12_fp_binop_s_int<0x58, "add", 0>,
-            basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+            basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
+defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
            basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
 defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
-            basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
-            basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+            basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
            basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;

 let isCommutable = 0 in {
  defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
-              basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
-              basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+              basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
+  defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
  defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
-              basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
-              basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+              basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
+  defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
  defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
-              basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
-              basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+              basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
+  defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
              basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
              basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
  defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
-              basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
-              basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+              basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
+  defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
              basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
              basic_sse12_fp_binop_p_y_int<0x5D, "min">,
              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
@ -2967,7 +2978,7 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
 let Predicates = [HasAVX] in {
  // Square root.
  defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt">,
-                sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V;
+                sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;

  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
                sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
@ -2981,13 +2992,13 @@ let Predicates = [HasAVX] in {

  // Reciprocal approximations. Note that these typically require refinement
  // in order to obtain suitable precision.
-  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V;
+  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;

-  defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V;
+  defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
@ -5612,14 +5623,14 @@ let Predicates = [HasAVX] in {
                                  int_x86_avx_round_pd_256>, VEX;
  defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
                                  int_x86_sse41_round_ss,
-                                  int_x86_sse41_round_sd, 0>, VEX_4V;
+                                  int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;

  // Instructions for the assembler
  defm VROUND  : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
                                        VEX;
  defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
                                        VEX;
-  defm VROUND  : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
+  defm VROUND  : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
 }

 defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@ -395,3 +395,30 @@

 # CHECK: rdrandq %rax
 0x48 0x0f 0xc7 0xf0
+
+# CHECK: vroundss $0, %xmm0, %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x0a 0xc0 0x00
+
+# CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x0b 0xc0 0x00
+
+# CHECK: vcvtsd2si %xmm0, %eax
+0xc4 0xe1 0x7f 0x2d 0xc0
+
+# CHECK: vcvtsd2si %xmm0, %rax
+0xc4 0xe1 0xff 0x2d 0xc0
+
+# CHECK: vucomisd %xmm1, %xmm0
+0xc5 0xfd 0x2e 0xc1
+
+# CHECK: vucomiss %xmm1, %xmm0
+0xc5 0xfc 0x2e 0xc1
+
+# CHECK: vcomisd %xmm1, %xmm0
+0xc5 0xfd 0x2f 0xc1
+
+# CHECK: vcomiss %xmm1, %xmm0
+0xc5 0xfc 0x2f 0xc1
+
+# CHECK: vaddss %xmm1, %xmm0, %xmm0
+0xc5 0xfe 0x58 0xc1
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@ -405,3 +405,30 @@

 # CHECK: rdrandl %eax
 0x0f 0xc7 0xf0
+
+# CHECK: vroundss $0, %xmm0, %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x0a 0xc0 0x00
+
+# CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x0b 0xc0 0x00
+
+# CHECK: vcvtsd2si %xmm0, %eax
+0xc4 0xe1 0x7f 0x2d 0xc0
+
+# CHECK: vcvtsd2si %xmm0, %eax
+0xc4 0xe1 0xff 0x2d 0xc0
+
+# CHECK: vucomisd %xmm1, %xmm0
+0xc5 0xfd 0x2e 0xc1
+
+# CHECK: vucomiss %xmm1, %xmm0
+0xc5 0xfc 0x2e 0xc1
+
+# CHECK: vcomisd %xmm1, %xmm0
+0xc5 0xfd 0x2f 0xc1
+
+# CHECK: vcomiss %xmm1, %xmm0
+0xc5 0xfc 0x2f 0xc1
+
+# CHECK: vaddss %xmm1, %xmm0, %xmm0
+0xc5 0xfe 0x58 0xc1
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@ -32,7 +32,8 @@ using namespace X86Disassembler;
 /// @param parent - The class that may be the superset
 /// @return       - True if child is a subset of parent, false otherwise.
 static inline bool inheritsFrom(InstructionContext child,
-                                InstructionContext parent) {
+                                InstructionContext parent,
+                                bool VEX_LIG = false) {
  if (child == parent)
    return true;
  
@ -68,33 +69,29 @@ static inline bool inheritsFrom(InstructionContext child,
  case IC_64BIT_XD_OPSIZE:
    return false;
  case IC_64BIT_REXW_XD:
-    return false;
  case IC_64BIT_REXW_XS:
-    return false;
  case IC_64BIT_REXW_OPSIZE:
    return false;
  case IC_VEX:
-    return inheritsFrom(child, IC_VEX_W);
+    return inheritsFrom(child, IC_VEX_W) ||
+           (VEX_LIG && inheritsFrom(child, IC_VEX_L));
  case IC_VEX_XS:
-    return inheritsFrom(child, IC_VEX_W_XS);
+    return inheritsFrom(child, IC_VEX_W_XS) ||
+           (VEX_LIG && inheritsFrom(child, IC_VEX_L_XS));
  case IC_VEX_XD:
-    return inheritsFrom(child, IC_VEX_W_XD);
+    return inheritsFrom(child, IC_VEX_W_XD) ||
+           (VEX_LIG && inheritsFrom(child, IC_VEX_L_XD));
  case IC_VEX_OPSIZE:
-    return inheritsFrom(child, IC_VEX_W_OPSIZE);
+    return inheritsFrom(child, IC_VEX_W_OPSIZE) ||
+           (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE));
  case IC_VEX_W:
-    return false;
  case IC_VEX_W_XS:
-    return false;
  case IC_VEX_W_XD:
-    return false;
  case IC_VEX_W_OPSIZE:
    return false;
  case IC_VEX_L:
-    return false;
  case IC_VEX_L_XS:
-    return false;
  case IC_VEX_L_XD:
-    return false;
  case IC_VEX_L_OPSIZE:
    return false;
  default:
@ -651,7 +648,8 @@ void DisassemblerTables::setTableFields(OpcodeType          type,
                                        uint8_t             opcode,
                                        const ModRMFilter   &filter,
                                        InstrUID            uid,
-                                        bool                is32bit) {
+                                        bool                is32bit,
+                                        bool                ignoresVEX_L) {
  unsigned index;
  
  ContextDecision &decision = *Tables[type];
@ -661,7 +659,7 @@ void DisassemblerTables::setTableFields(OpcodeType          type,
      continue;

    if (inheritsFrom((InstructionContext)index, 
-                     InstructionSpecifiers[uid].insnContext))
+                     InstructionSpecifiers[uid].insnContext, ignoresVEX_L))
      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], 
                     filter,
                     uid,
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@ -261,12 +261,14 @@ public:
  ///                       correspond to the desired instruction.
  /// @param uid          - The unique ID of the instruction.
  /// @param is32bit      - Instructon is only 32-bit
+  /// @param ignoresVEX_L - Instruction ignores VEX.L
  void setTableFields(OpcodeType type,
                      InstructionContext insnContext,
                      uint8_t opcode,
                      const ModRMFilter &filter,
                      InstrUID uid,
-                      bool is32bit);  
+                      bool is32bit,
+                      bool ignoresVEX_L);  
  
  /// specForUID - Returns the instruction specifier for a given unique
  ///   instruction ID.  Used when resolving collisions.
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@ -217,6 +217,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
  HasVEXPrefix     = Rec->getValueAsBit("hasVEXPrefix");
  HasVEX_4VPrefix  = Rec->getValueAsBit("hasVEX_4VPrefix");
  HasVEX_WPrefix   = Rec->getValueAsBit("hasVEX_WPrefix");
+  IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
  HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
  IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
  
@ -284,7 +285,9 @@ InstructionContext RecognizableInstr::insnContext() const {
  InstructionContext insnContext;

  if (HasVEX_4VPrefix || HasVEXPrefix) {
-    if (HasOpSizePrefix && HasVEX_LPrefix)
+    if (HasVEX_LPrefix && HasVEX_WPrefix)
+      llvm_unreachable("Don't support VEX.L and VEX.W together");
+    else if (HasOpSizePrefix && HasVEX_LPrefix)
      insnContext = IC_VEX_L_OPSIZE;
    else if (HasOpSizePrefix && HasVEX_WPrefix)
      insnContext = IC_VEX_W_OPSIZE;
@ -957,7 +960,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
                              insnContext(), 
                              currentOpcode, 
                              *filter, 
-                              UID, Is32Bit);
+                              UID, Is32Bit, IgnoresVEX_L);
    
      Spec->modifierType = MODIFIER_OPCODE;
      Spec->modifierBase = opcodeToSet;
@ -967,14 +970,14 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
                            insnContext(), 
                            opcodeToSet, 
                            *filter, 
-                            UID, Is32Bit);
+                            UID, Is32Bit, IgnoresVEX_L);
    }
  } else {
    tables.setTableFields(opcodeType,
                          insnContext(),
                          opcodeToSet,
                          *filter,
-                          UID, Is32Bit);
+                          UID, Is32Bit, IgnoresVEX_L);
    
    Spec->modifierType = MODIFIER_NONE;
    Spec->modifierBase = opcodeToSet;
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@ -60,6 +60,8 @@ private:
  bool HasVEX_WPrefix;
  /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
  bool HasVEX_LPrefix;
+  // The ignoreVEX_L field from the record
+  bool IgnoresVEX_L;
  /// The hasLockPrefix field from the record
  bool HasLockPrefix;
  /// The isCodeGenOnly filed from the record