Split instruction info into multiple files, one for each of x87, MMX, and SSE.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26300 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-08 03:30:22 +00:00 · 2006-02-21 19:13:53 +00:00 · 2006-02-21 19:13:53 +00:00 · ffcb95beab
commit ffcb95beab
parent a1532bc283
4 changed files with 644 additions and 590 deletions
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@ -0,0 +1,482 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set -------*- C++ -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+// All FP Stack operations are represented with two instructions here.  The
+// first instruction, generated by the instruction selector, uses "RFP"
+// registers: a traditional register file to reference floating point values.
+// These instructions are all psuedo instructions and use the "Fp" prefix.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler.  The FP stackifier pass converts one to the other
+// after register allocation occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag ops, string asm> : I<o, F, ops, asm, []> {}
+
+// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+class FpI_<dag ops, FPFormat fp, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, ops, ""> {
+  let FPForm = fp; let FPFormBits = FPForm.Value;
+  let Pattern = pattern;
+}
+
+// Random Pseudo Instructions.
+def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP,
+                  [(set RFP:$dst, X86fpget)]>;                    // FPR = ST(0)
+
+let noResults = 1 in 
+  def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP,
+                        [(X86fpset RFP:$src)]>, Imp<[], [ST0]>;   // ST(0) = FPR
+
+// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
+class FpI<dag ops, FPFormat fp, list<dag> pattern> :
+  FpI_<ops, fp, pattern>, Requires<[FPStack]>;
+
+
+def FpMOV       : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2
+
+// Arithmetic
+// Add, Sub, Mul, Div.
+def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
+                [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>;
+def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
+                [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>;
+def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
+                [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>;
+def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
+                [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>;
+
+class FPST0rInst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (ops RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;
+
+// Binary Ops with a memory source.
+def FpADD32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fadd RFP:$src1,
+                                     (extloadf64f32 addr:$src2)))]>;
+                // ST(0) = ST(0) + [mem32]
+def FpADD64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>;
+                // ST(0) = ST(0) + [mem64]
+def FpMUL32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fmul RFP:$src1,
+                                     (extloadf64f32 addr:$src2)))]>;
+                // ST(0) = ST(0) * [mem32]
+def FpMUL64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>;
+                // ST(0) = ST(0) * [mem64]
+def FpSUB32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fsub RFP:$src1,
+                                    (extloadf64f32 addr:$src2)))]>;
+                // ST(0) = ST(0) - [mem32]
+def FpSUB64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>;
+                // ST(0) = ST(0) - [mem64]
+def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2),
+                                     RFP:$src1))]>;
+                // ST(0) = [mem32] - ST(0)
+def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>;
+                // ST(0) = [mem64] - ST(0)
+def FpDIV32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fdiv RFP:$src1,
+                                    (extloadf64f32 addr:$src2)))]>;
+                // ST(0) = ST(0) / [mem32]
+def FpDIV64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>;
+                // ST(0) = ST(0) / [mem64]
+def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2),
+                                     RFP:$src1))]>;
+                // ST(0) = [mem32] / ST(0)
+def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>;
+                // ST(0) = [mem64] / ST(0)
+
+
+def FADD32m  : FPI<0xD8, MRM0m, (ops f32mem:$src), "fadd{s} $src">;
+def FADD64m  : FPI<0xDC, MRM0m, (ops f64mem:$src), "fadd{l} $src">;
+def FMUL32m  : FPI<0xD8, MRM1m, (ops f32mem:$src), "fmul{s} $src">;
+def FMUL64m  : FPI<0xDC, MRM1m, (ops f64mem:$src), "fmul{l} $src">;
+def FSUB32m  : FPI<0xD8, MRM4m, (ops f32mem:$src), "fsub{s} $src">;
+def FSUB64m  : FPI<0xDC, MRM4m, (ops f64mem:$src), "fsub{l} $src">;
+def FSUBR32m : FPI<0xD8, MRM5m, (ops f32mem:$src), "fsubr{s} $src">;
+def FSUBR64m : FPI<0xDC, MRM5m, (ops f64mem:$src), "fsubr{l} $src">;
+def FDIV32m  : FPI<0xD8, MRM6m, (ops f32mem:$src), "fdiv{s} $src">;
+def FDIV64m  : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">;
+def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">;
+def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
+
+def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fadd RFP:$src1,
+                                     (X86fild addr:$src2, i16)))]>;
+                // ST(0) = ST(0) + [mem16int]
+def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fadd RFP:$src1,
+                                     (X86fild addr:$src2, i32)))]>;
+                // ST(0) = ST(0) + [mem32int]
+def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fmul RFP:$src1,
+                                     (X86fild addr:$src2, i16)))]>;
+                // ST(0) = ST(0) * [mem16int]
+def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fmul RFP:$src1,
+                                     (X86fild addr:$src2, i32)))]>;
+                // ST(0) = ST(0) * [mem32int]
+def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fsub RFP:$src1,
+                                     (X86fild addr:$src2, i16)))]>;
+                // ST(0) = ST(0) - [mem16int]
+def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fsub RFP:$src1,
+                                     (X86fild addr:$src2, i32)))]>;
+                // ST(0) = ST(0) - [mem32int]
+def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
+                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
+                                      RFP:$src1))]>;
+                // ST(0) = [mem16int] - ST(0)
+def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
+                                      RFP:$src1))]>;
+                // ST(0) = [mem32int] - ST(0)
+def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fdiv RFP:$src1,
+                                     (X86fild addr:$src2, i16)))]>;
+                // ST(0) = ST(0) / [mem16int]
+def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fdiv RFP:$src1,
+                                     (X86fild addr:$src2, i32)))]>;
+                // ST(0) = ST(0) / [mem32int]
+def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
+                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
+                                      RFP:$src1))]>;
+                // ST(0) = [mem16int] / ST(0)
+def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
+                                      RFP:$src1))]>;
+                // ST(0) = [mem32int] / ST(0)
+
+def FIADD16m  : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">;
+def FIADD32m  : FPI<0xDA, MRM0m, (ops i32mem:$src), "fiadd{l} $src">;
+def FIMUL16m  : FPI<0xDE, MRM1m, (ops i16mem:$src), "fimul{s} $src">;
+def FIMUL32m  : FPI<0xDA, MRM1m, (ops i32mem:$src), "fimul{l} $src">;
+def FISUB16m  : FPI<0xDE, MRM4m, (ops i16mem:$src), "fisub{s} $src">;
+def FISUB32m  : FPI<0xDA, MRM4m, (ops i32mem:$src), "fisub{l} $src">;
+def FISUBR16m : FPI<0xDE, MRM5m, (ops i16mem:$src), "fisubr{s} $src">;
+def FISUBR32m : FPI<0xDA, MRM5m, (ops i32mem:$src), "fisubr{l} $src">;
+def FIDIV16m  : FPI<0xDE, MRM6m, (ops i16mem:$src), "fidiv{s} $src">;
+def FIDIV32m  : FPI<0xDA, MRM6m, (ops i32mem:$src), "fidiv{l} $src">;
+def FIDIVR16m : FPI<0xDE, MRM7m, (ops i16mem:$src), "fidivr{s} $src">;
+def FIDIVR32m : FPI<0xDA, MRM7m, (ops i32mem:$src), "fidivr{l} $src">;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
+// we have to put some 'r's in and take them out of weird places.
+def FADDST0r   : FPST0rInst <0xC0, "fadd $op">;
+def FADDrST0   : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">;
+def FADDPrST0  : FPrST0PInst<0xC0, "faddp $op">;
+def FSUBRST0r  : FPST0rInst <0xE8, "fsubr $op">;
+def FSUBrST0   : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">;
+def FSUBPrST0  : FPrST0PInst<0xE8, "fsub{r}p $op">;
+def FSUBST0r   : FPST0rInst <0xE0, "fsub $op">;
+def FSUBRrST0  : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">;
+def FSUBRPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">;
+def FMULST0r   : FPST0rInst <0xC8, "fmul $op">;
+def FMULrST0   : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">;
+def FMULPrST0  : FPrST0PInst<0xC8, "fmulp $op">;
+def FDIVRST0r  : FPST0rInst <0xF8, "fdivr $op">;
+def FDIVrST0   : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">;
+def FDIVPrST0  : FPrST0PInst<0xF8, "fdiv{r}p $op">;
+def FDIVST0r   : FPST0rInst <0xF0, "fdiv $op">;
+def FDIVRrST0  : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
+def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
+
+
+// Unary operations.
+def FpCHS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
+                 [(set RFP:$dst, (fneg RFP:$src))]>;
+def FpABS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
+                 [(set RFP:$dst, (fabs RFP:$src))]>;
+def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
+                 [(set RFP:$dst, (fsqrt RFP:$src))]>;
+def FpSIN  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
+                 [(set RFP:$dst, (fsin RFP:$src))]>;
+def FpCOS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
+                 [(set RFP:$dst, (fcos RFP:$src))]>;
+def FpTST  : FpI<(ops RFP:$src), OneArgFP,
+                 []>;
+
+def FCHS  : FPI<0xE0, RawFrm, (ops), "fchs">, D9;
+def FABS  : FPI<0xE1, RawFrm, (ops), "fabs">, D9;
+def FSQRT : FPI<0xFA, RawFrm, (ops), "fsqrt">, D9;
+def FSIN  : FPI<0xFE, RawFrm, (ops), "fsin">, D9;
+def FCOS  : FPI<0xFF, RawFrm, (ops), "fcos">, D9;
+def FTST  : FPI<0xE4, RawFrm, (ops), "ftst">, D9;
+
+
+// Floating point cmovs.
+let isTwoAddress = 1 in {
+  def FpCMOVB  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_B))]>;
+  def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_BE))]>;
+  def FpCMOVE  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_E))]>;
+  def FpCMOVP  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_P))]>;
+  def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_AE))]>;
+  def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_A))]>;
+  def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_NE))]>;
+  def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
+                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+                                      X86_COND_NP))]>;
+}
+
+def FCMOVB  : FPI<0xC0, AddRegFrm, (ops RST:$op),
+                  "fcmovb {$op, %st(0)|%ST(0), $op}">, DA;
+def FCMOVBE : FPI<0xD0, AddRegFrm, (ops RST:$op),
+                  "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA;
+def FCMOVE  : FPI<0xC8, AddRegFrm, (ops RST:$op),
+                  "fcmove {$op, %st(0)|%ST(0), $op}">, DA;
+def FCMOVP  : FPI<0xD8, AddRegFrm, (ops RST:$op),
+                  "fcmovu  {$op, %st(0)|%ST(0), $op}">, DA;
+def FCMOVNB : FPI<0xC0, AddRegFrm, (ops RST:$op),
+                  "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB;
+def FCMOVNBE  : FPI<0xD0, AddRegFrm, (ops RST:$op),
+                  "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB;
+def FCMOVNE : FPI<0xC8, AddRegFrm, (ops RST:$op),
+                  "fcmovne {$op, %st(0)|%ST(0), $op}">, DB;
+def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op),
+                  "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
+
+// Floating point loads & stores.
+def FpLD32m  : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
+                   [(set RFP:$dst, (extloadf64f32 addr:$src))]>;
+def FpLD64m  : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
+                   [(set RFP:$dst, (loadf64 addr:$src))]>;
+def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
+                   [(set RFP:$dst, (X86fild addr:$src, i16))]>;
+def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
+                   [(set RFP:$dst, (X86fild addr:$src, i32))]>;
+def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
+                   [(set RFP:$dst, (X86fild addr:$src, i64))]>;
+
+def FpST32m   : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
+                [(truncstore RFP:$src, addr:$op, f32)]>;
+def FpST64m   : FpI<(ops f64mem:$op, RFP:$src), OneArgFP,
+                [(store RFP:$src, addr:$op)]>;
+
+def FpSTP32m  : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>;
+def FpSTP64m  : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>;
+def FpIST16m  : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>;
+def FpIST32m  : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>;
+def FpIST64m  : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>;
+
+def FLD32m   : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
+def FLD64m   : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
+def FILD16m  : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">;
+def FILD32m  : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">;
+def FILD64m  : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">;
+def FST32m   : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">;
+def FST64m   : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">;
+def FSTP32m  : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">;
+def FSTP64m  : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">;
+def FIST16m  : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">;
+def FIST32m  : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">;
+def FISTP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">;
+def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">;
+def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def FpISTT16m  : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP,
+                [(X86fp_to_i16mem RFP:$src, addr:$op)]>,
+                Requires<[HasSSE3]>;
+def FpISTT32m  : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP,
+                [(X86fp_to_i32mem RFP:$src, addr:$op)]>,
+                Requires<[HasSSE3]>;
+def FpISTT64m  : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP,
+                [(X86fp_to_i64mem RFP:$src, addr:$op)]>,
+                Requires<[HasSSE3]>;
+
+def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
+def FISTTP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">;
+def FISTTP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">;
+
+// FP Stack manipulation instructions.
+def FLDrr   : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9;
+def FSTrr   : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD;
+def FSTPrr  : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD;
+def FXCH    : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9;
+
+// Floating point constant loads.
+def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP,
+                [(set RFP:$dst, fp64imm0)]>;
+def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP,
+                [(set RFP:$dst, fp64imm1)]>;
+
+def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
+def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9;
+
+
+// Floating point compares.
+def FpUCOMr   : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
+                    []>;  // FPSW = cmp ST(0) with ST(i)
+def FpUCOMIr  : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
+                    [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i)
+
+def FUCOMr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
+                    (ops RST:$reg),
+                    "fucom $reg">, DD, Imp<[ST0],[]>;
+def FUCOMPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
+                  (ops RST:$reg),
+                  "fucomp $reg">, DD, Imp<[ST0],[]>;
+def FUCOMPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
+                  (ops),
+                  "fucompp">, DA, Imp<[ST0],[]>;
+
+def FUCOMIr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
+                   (ops RST:$reg),
+                   "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
+def FUCOMIPr : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
+                 (ops RST:$reg),
+                 "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
+
+
+// Floating point flag ops.
+def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
+                  (ops), "fnstsw", []>, DF, Imp<[],[AX]>;
+
+def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
+                  (ops i16mem:$dst), "fnstcw $dst", []>;
+def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
+                  (ops i16mem:$dst), "fldcw $dst", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map fld0 to pxor for sse.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
+                 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+               Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
+                 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
+// Upper bits are disregarded.
+def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                   "movaps {$src, $dst|$dst, $src}", []>,
+                 Requires<[HasSSE1]>, TB;
+def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                   "movapd {$src, $dst|$dst, $src}", []>,
+                 Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
+// Upper bits are disregarded.
+def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
+                   "movaps {$src, $dst|$dst, $src}",
+                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
+                 Requires<[HasSSE1]>, TB;
+def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
+                  "movapd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                  "andps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
+                Requires<[HasSSE1]>, TB;
+def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  "andpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                 "orps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                 "orpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                  "xorps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
+                Requires<[HasSSE1]>, TB;
+def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  "xorpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+}
+def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                  "andps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fand FR32:$src1,
+                                    (X86loadpf32 addr:$src2)))]>,
+                Requires<[HasSSE1]>, TB;
+def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                  "andpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fand FR64:$src1,
+                                    (X86loadpf64 addr:$src2)))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                 "orps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                 "orpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                  "xorps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fxor FR32:$src1,
+                                    (X86loadpf32 addr:$src2)))]>,
+                Requires<[HasSSE1]>, TB;
+def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                  "xorpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fxor FR64:$src1,
+                                    (X86loadpf64 addr:$src2)))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+
+def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                   "andnps {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE1]>, TB;
+def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                   "andnps {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE1]>, TB;
+def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                   "andnpd {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE2]>, TB, OpSize;
+def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                   "andnpd {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE2]>, TB, OpSize;
+}
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@ -191,6 +191,7 @@ def MRMInitReg : Format<32>;

 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
+def HasMMX  : Predicate<"Subtarget->hasMMX()">;
 def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
 def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
 def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
@ -2571,510 +2572,19 @@ def CMPSDrm : I<0xC2, MRMSrcMem,
 // Floating Point Stack Support
 //===----------------------------------------------------------------------===//

-// Floating point support.  All FP Stack operations are represented with two 
-// instructions here.  The first instruction, generated by the instruction
-// selector, uses "RFP" registers: a traditional register file to reference
-// floating point values.  These instructions are all psuedo instructions and
-// use the "Fp" prefix.  The second instruction is defined with FPI, which is
-// the actual instruction emitted by the assembler.  The FP stackifier pass
-// converts one to the other after register allocation occurs.
-//
-// Note that the FpI instruction should have instruction selection info (e.g.
-// a pattern) and the FPI instruction should have emission info (e.g. opcode
-// encoding and asm printing info).
-
-// FPI - Floating Point Instruction template.
-class FPI<bits<8> o, Format F, dag ops, string asm> : I<o, F, ops, asm, []> {}
-
-// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
-class FpI_<dag ops, FPFormat fp, list<dag> pattern>
-  : X86Inst<0, Pseudo, NoImm, ops, ""> {
-  let FPForm = fp; let FPFormBits = FPForm.Value;
-  let Pattern = pattern;
-}
-
-// Random Pseudo Instructions.
-def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP,
-                  [(set RFP:$dst, X86fpget)]>;                    // FPR = ST(0)
-
-let noResults = 1 in 
-  def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP,
-                        [(X86fpset RFP:$src)]>, Imp<[], [ST0]>;   // ST(0) = FPR
-
-// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
-class FpI<dag ops, FPFormat fp, list<dag> pattern> :
-  FpI_<ops, fp, pattern>, Requires<[FPStack]>;
-
-
-def FpMOV       : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2
-
-// Arithmetic
-// Add, Sub, Mul, Div.
-def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
-                [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>;
-def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
-                [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>;
-def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
-                [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>;
-def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
-                [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>;
-
-class FPST0rInst<bits<8> o, string asm>
-  : FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
-class FPrST0Inst<bits<8> o, string asm>
-  : FPI<o, AddRegFrm, (ops RST:$op), asm>, DC;
-class FPrST0PInst<bits<8> o, string asm>
-  : FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;
-
-// Binary Ops with a memory source.
-def FpADD32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fadd RFP:$src1,
-                                     (extloadf64f32 addr:$src2)))]>;
-                // ST(0) = ST(0) + [mem32]
-def FpADD64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>;
-                // ST(0) = ST(0) + [mem64]
-def FpMUL32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fmul RFP:$src1,
-                                     (extloadf64f32 addr:$src2)))]>;
-                // ST(0) = ST(0) * [mem32]
-def FpMUL64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>;
-                // ST(0) = ST(0) * [mem64]
-def FpSUB32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fsub RFP:$src1,
-                                    (extloadf64f32 addr:$src2)))]>;
-                // ST(0) = ST(0) - [mem32]
-def FpSUB64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>;
-                // ST(0) = ST(0) - [mem64]
-def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2),
-                                     RFP:$src1))]>;
-                // ST(0) = [mem32] - ST(0)
-def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>;
-                // ST(0) = [mem64] - ST(0)
-def FpDIV32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fdiv RFP:$src1,
-                                    (extloadf64f32 addr:$src2)))]>;
-                // ST(0) = ST(0) / [mem32]
-def FpDIV64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>;
-                // ST(0) = ST(0) / [mem64]
-def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2),
-                                     RFP:$src1))]>;
-                // ST(0) = [mem32] / ST(0)
-def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>;
-                // ST(0) = [mem64] / ST(0)
-
-
-def FADD32m  : FPI<0xD8, MRM0m, (ops f32mem:$src), "fadd{s} $src">;
-def FADD64m  : FPI<0xDC, MRM0m, (ops f64mem:$src), "fadd{l} $src">;
-def FMUL32m  : FPI<0xD8, MRM1m, (ops f32mem:$src), "fmul{s} $src">;
-def FMUL64m  : FPI<0xDC, MRM1m, (ops f64mem:$src), "fmul{l} $src">;
-def FSUB32m  : FPI<0xD8, MRM4m, (ops f32mem:$src), "fsub{s} $src">;
-def FSUB64m  : FPI<0xDC, MRM4m, (ops f64mem:$src), "fsub{l} $src">;
-def FSUBR32m : FPI<0xD8, MRM5m, (ops f32mem:$src), "fsubr{s} $src">;
-def FSUBR64m : FPI<0xDC, MRM5m, (ops f64mem:$src), "fsubr{l} $src">;
-def FDIV32m  : FPI<0xD8, MRM6m, (ops f32mem:$src), "fdiv{s} $src">;
-def FDIV64m  : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">;
-def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">;
-def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
-
-def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fadd RFP:$src1,
-                                     (X86fild addr:$src2, i16)))]>;
-                // ST(0) = ST(0) + [mem16int]
-def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fadd RFP:$src1,
-                                     (X86fild addr:$src2, i32)))]>;
-                // ST(0) = ST(0) + [mem32int]
-def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fmul RFP:$src1,
-                                     (X86fild addr:$src2, i16)))]>;
-                // ST(0) = ST(0) * [mem16int]
-def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fmul RFP:$src1,
-                                     (X86fild addr:$src2, i32)))]>;
-                // ST(0) = ST(0) * [mem32int]
-def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fsub RFP:$src1,
-                                     (X86fild addr:$src2, i16)))]>;
-                // ST(0) = ST(0) - [mem16int]
-def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fsub RFP:$src1,
-                                     (X86fild addr:$src2, i32)))]>;
-                // ST(0) = ST(0) - [mem32int]
-def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
-                                      RFP:$src1))]>;
-                // ST(0) = [mem16int] - ST(0)
-def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
-                                      RFP:$src1))]>;
-                // ST(0) = [mem32int] - ST(0)
-def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fdiv RFP:$src1,
-                                     (X86fild addr:$src2, i16)))]>;
-                // ST(0) = ST(0) / [mem16int]
-def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
-                    [(set RFP:$dst, (fdiv RFP:$src1,
-                                     (X86fild addr:$src2, i32)))]>;
-                // ST(0) = ST(0) / [mem32int]
-def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
-                                      RFP:$src1))]>;
-                // ST(0) = [mem16int] / ST(0)
-def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
-                                      RFP:$src1))]>;
-                // ST(0) = [mem32int] / ST(0)
-
-def FIADD16m  : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">;
-def FIADD32m  : FPI<0xDA, MRM0m, (ops i32mem:$src), "fiadd{l} $src">;
-def FIMUL16m  : FPI<0xDE, MRM1m, (ops i16mem:$src), "fimul{s} $src">;
-def FIMUL32m  : FPI<0xDA, MRM1m, (ops i32mem:$src), "fimul{l} $src">;
-def FISUB16m  : FPI<0xDE, MRM4m, (ops i16mem:$src), "fisub{s} $src">;
-def FISUB32m  : FPI<0xDA, MRM4m, (ops i32mem:$src), "fisub{l} $src">;
-def FISUBR16m : FPI<0xDE, MRM5m, (ops i16mem:$src), "fisubr{s} $src">;
-def FISUBR32m : FPI<0xDA, MRM5m, (ops i32mem:$src), "fisubr{l} $src">;
-def FIDIV16m  : FPI<0xDE, MRM6m, (ops i16mem:$src), "fidiv{s} $src">;
-def FIDIV32m  : FPI<0xDA, MRM6m, (ops i32mem:$src), "fidiv{l} $src">;
-def FIDIVR16m : FPI<0xDE, MRM7m, (ops i16mem:$src), "fidivr{s} $src">;
-def FIDIVR32m : FPI<0xDA, MRM7m, (ops i32mem:$src), "fidivr{l} $src">;
-
-// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
-// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
-// we have to put some 'r's in and take them out of weird places.
-def FADDST0r   : FPST0rInst <0xC0, "fadd $op">;
-def FADDrST0   : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">;
-def FADDPrST0  : FPrST0PInst<0xC0, "faddp $op">;
-def FSUBRST0r  : FPST0rInst <0xE8, "fsubr $op">;
-def FSUBrST0   : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">;
-def FSUBPrST0  : FPrST0PInst<0xE8, "fsub{r}p $op">;
-def FSUBST0r   : FPST0rInst <0xE0, "fsub $op">;
-def FSUBRrST0  : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">;
-def FSUBRPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">;
-def FMULST0r   : FPST0rInst <0xC8, "fmul $op">;
-def FMULrST0   : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">;
-def FMULPrST0  : FPrST0PInst<0xC8, "fmulp $op">;
-def FDIVRST0r  : FPST0rInst <0xF8, "fdivr $op">;
-def FDIVrST0   : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">;
-def FDIVPrST0  : FPrST0PInst<0xF8, "fdiv{r}p $op">;
-def FDIVST0r   : FPST0rInst <0xF0, "fdiv $op">;
-def FDIVRrST0  : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
-def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
-
-
-// Unary operations.
-def FpCHS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
-                 [(set RFP:$dst, (fneg RFP:$src))]>;
-def FpABS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
-                 [(set RFP:$dst, (fabs RFP:$src))]>;
-def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
-                 [(set RFP:$dst, (fsqrt RFP:$src))]>;
-def FpSIN  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
-                 [(set RFP:$dst, (fsin RFP:$src))]>;
-def FpCOS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
-                 [(set RFP:$dst, (fcos RFP:$src))]>;
-def FpTST  : FpI<(ops RFP:$src), OneArgFP,
-                 []>;
-
-def FCHS  : FPI<0xE0, RawFrm, (ops), "fchs">, D9;
-def FABS  : FPI<0xE1, RawFrm, (ops), "fabs">, D9;
-def FSQRT : FPI<0xFA, RawFrm, (ops), "fsqrt">, D9;
-def FSIN  : FPI<0xFE, RawFrm, (ops), "fsin">, D9;
-def FCOS  : FPI<0xFF, RawFrm, (ops), "fcos">, D9;
-def FTST  : FPI<0xE4, RawFrm, (ops), "ftst">, D9;
-
-
-// Floating point cmovs.
-let isTwoAddress = 1 in {
-  def FpCMOVB  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_B))]>;
-  def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_BE))]>;
-  def FpCMOVE  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_E))]>;
-  def FpCMOVP  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_P))]>;
-  def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_AE))]>;
-  def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_A))]>;
-  def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_NE))]>;
-  def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
-                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
-                                      X86_COND_NP))]>;
-}
-
-def FCMOVB  : FPI<0xC0, AddRegFrm, (ops RST:$op),
-                  "fcmovb {$op, %st(0)|%ST(0), $op}">, DA;
-def FCMOVBE : FPI<0xD0, AddRegFrm, (ops RST:$op),
-                  "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA;
-def FCMOVE  : FPI<0xC8, AddRegFrm, (ops RST:$op),
-                  "fcmove {$op, %st(0)|%ST(0), $op}">, DA;
-def FCMOVP  : FPI<0xD8, AddRegFrm, (ops RST:$op),
-                  "fcmovu  {$op, %st(0)|%ST(0), $op}">, DA;
-def FCMOVNB : FPI<0xC0, AddRegFrm, (ops RST:$op),
-                  "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB;
-def FCMOVNBE  : FPI<0xD0, AddRegFrm, (ops RST:$op),
-                  "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB;
-def FCMOVNE : FPI<0xC8, AddRegFrm, (ops RST:$op),
-                  "fcmovne {$op, %st(0)|%ST(0), $op}">, DB;
-def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op),
-                  "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
-
-// Floating point loads & stores.
-def FpLD32m  : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (extloadf64f32 addr:$src))]>;
-def FpLD64m  : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (loadf64 addr:$src))]>;
-def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (X86fild addr:$src, i16))]>;
-def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (X86fild addr:$src, i32))]>;
-def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (X86fild addr:$src, i64))]>;
-
-def FpST32m   : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
-                [(truncstore RFP:$src, addr:$op, f32)]>;
-def FpST64m   : FpI<(ops f64mem:$op, RFP:$src), OneArgFP,
-                [(store RFP:$src, addr:$op)]>;
-
-def FpSTP32m  : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>;
-def FpSTP64m  : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>;
-def FpIST16m  : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>;
-def FpIST32m  : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>;
-def FpIST64m  : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>;
-
-def FLD32m   : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
-def FLD64m   : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
-def FILD16m  : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">;
-def FILD32m  : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">;
-def FILD64m  : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">;
-def FST32m   : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">;
-def FST64m   : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">;
-def FSTP32m  : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">;
-def FSTP64m  : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">;
-def FIST16m  : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">;
-def FIST32m  : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">;
-def FISTP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">;
-def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">;
-def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;
-
-// FISTTP requires SSE3 even though it's a FPStack op.
-def FpISTT16m  : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP,
-                [(X86fp_to_i16mem RFP:$src, addr:$op)]>,
-                Requires<[HasSSE3]>;
-def FpISTT32m  : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP,
-                [(X86fp_to_i32mem RFP:$src, addr:$op)]>,
-                Requires<[HasSSE3]>;
-def FpISTT64m  : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP,
-                [(X86fp_to_i64mem RFP:$src, addr:$op)]>,
-                Requires<[HasSSE3]>;
-
-def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
-def FISTTP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">;
-def FISTTP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">;
-
-// FP Stack manipulation instructions.
-def FLDrr   : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9;
-def FSTrr   : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD;
-def FSTPrr  : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD;
-def FXCH    : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9;
-
-// Floating point constant loads.
-def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP,
-                [(set RFP:$dst, fp64imm0)]>;
-def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP,
-                [(set RFP:$dst, fp64imm1)]>;
-
-def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
-def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9;
-
-
-// Floating point compares.
-def FpUCOMr   : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
-                    []>;  // FPSW = cmp ST(0) with ST(i)
-def FpUCOMIr  : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
-                    [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i)
-
-def FUCOMr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
-                    (ops RST:$reg),
-                    "fucom $reg">, DD, Imp<[ST0],[]>;
-def FUCOMPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
-                  (ops RST:$reg),
-                  "fucomp $reg">, DD, Imp<[ST0],[]>;
-def FUCOMPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
-                  (ops),
-                  "fucompp">, DA, Imp<[ST0],[]>;
-
-def FUCOMIr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
-                   (ops RST:$reg),
-                   "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
-def FUCOMIPr : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
-                 (ops RST:$reg),
-                 "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
-
-
-// Floating point flag ops.
-def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
-                  (ops), "fnstsw", []>, DF, Imp<[],[AX]>;
-
-def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
-                  (ops i16mem:$dst), "fnstcw $dst", []>;
-def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
-                  (ops i16mem:$dst), "fldcw $dst", []>;
-
+include "X86InstrFPStack.td"

 //===----------------------------------------------------------------------===//
 // MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
 //===----------------------------------------------------------------------===//

-// Move Instructions
-def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src),
-                 "movd {$src, $dst|$dst, $src}", []>, TB,
-               Requires<[HasSSE1]>;
-def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src),
-                 "movd {$src, $dst|$dst, $src}", []>, TB,
-               Requires<[HasSSE1]>;
-def MOVD64mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src),
-                 "movd {$src, $dst|$dst, $src}", []>, TB,
-               Requires<[HasSSE1]>;
-
-def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-
-
-def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
-                 "movq {$src, $dst|$dst, $src}", []>, TB,
-               Requires<[HasSSE1]>;
-def MOVQ64rm : I<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
-                 "movq {$src, $dst|$dst, $src}", []>, TB,
-               Requires<[HasSSE1]>;
-def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
-                 "movq {$src, $dst|$dst, $src}", []>, TB,
-               Requires<[HasSSE1]>;
-
-def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
-def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
-def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
+include "X86InstrMMX.td"

 //===----------------------------------------------------------------------===//
 // XMM Packed Floating point support (requires SSE / SSE2)
 //===----------------------------------------------------------------------===//

-def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
-                "movaps {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
-                "movapd {$src, $dst|$dst, $src}v2", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
-def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
-                "movaps {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
-                "movaps {$src, $dst|$dst, $src}",[]>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
-                "movapd {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB, OpSize;
-def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
-                "movapd {$src, $dst|$dst, $src}",[]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
-// Logical
-let isTwoAddress = 1 in {
-let isCommutable = 1 in {
-def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "andps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
-              Requires<[HasSSE1]>, TB;
-def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "andpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "orps {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE1]>, TB;
-def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "orpd {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "xorps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
-              Requires<[HasSSE1]>, TB;
-def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "xorpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-}
-def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "andps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fand V4F32:$src1,
-                                  (X86loadpv4f32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, TB;
-def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "andpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fand V2F64:$src1,
-                                  (X86loadpv2f64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "orps {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE1]>, TB;
-def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "orpd {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "xorps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fxor V4F32:$src1,
-                                  (X86loadpv4f32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, TB;
-def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "xorpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fxor V2F64:$src1,
-                                  (X86loadpv2f64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-
-def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "andnps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "andnps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "andnpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "andnpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-}
+include "X86InstrSSE.td"

 //===----------------------------------------------------------------------===//
 // Miscellaneous Instructions
@ -3100,102 +2610,6 @@ def MOV32r0  : I<0x31, MRMInitReg,  (ops R32:$dst),
                 "xor{l} $dst, $dst",
                 [(set R32:$dst, 0)]>;

-// Alias instructions that map fld0 to pxor for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
-               "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
-             Requires<[HasSSE1]>, TB, OpSize;
-def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
-               "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
-             Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
-// Upper bits are disregarded.
-def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
-                   "movaps {$src, $dst|$dst, $src}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
-                   "movapd {$src, $dst|$dst, $src}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
-// Upper bits are disregarded.
-def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
-                   "movaps {$src, $dst|$dst, $src}",
-                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
-                 Requires<[HasSSE1]>, TB;
-def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
-                  "movapd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let isTwoAddress = 1 in {
-let isCommutable = 1 in {
-def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                  "andps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
-                Requires<[HasSSE1]>, TB;
-def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                  "andpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                 "orps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                 "orpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                  "xorps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
-                Requires<[HasSSE1]>, TB;
-def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                  "xorpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-}
-def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                  "andps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fand FR32:$src1,
-                                    (X86loadpf32 addr:$src2)))]>,
-                Requires<[HasSSE1]>, TB;
-def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                  "andpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fand FR64:$src1,
-                                    (X86loadpf64 addr:$src2)))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                 "orps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                 "orpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                  "xorps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fxor FR32:$src1,
-                                    (X86loadpf32 addr:$src2)))]>,
-                Requires<[HasSSE1]>, TB;
-def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                  "xorpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fxor FR64:$src1,
-                                    (X86loadpf64 addr:$src2)))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-
-def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                   "andnps {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                   "andnps {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                   "andnpd {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                   "andnpd {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-}
-
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@ -0,0 +1,57 @@
+//====- X86InstrMMX.td - Describe the X86 Instruction Set -------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src),
+                 "movd {$src, $dst|$dst, $src}", []>, TB,
+               Requires<[HasMMX]>;
+def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src),
+                 "movd {$src, $dst|$dst, $src}", []>, TB,
+               Requires<[HasMMX]>;
+def MOVD64mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src),
+                 "movd {$src, $dst|$dst, $src}", []>, TB,
+               Requires<[HasMMX]>;
+
+def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+
+
+def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
+                 "movq {$src, $dst|$dst, $src}", []>, TB,
+               Requires<[HasMMX]>;
+def MOVQ64rm : I<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
+                 "movq {$src, $dst|$dst, $src}", []>, TB,
+               Requires<[HasMMX]>;
+def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
+                 "movq {$src, $dst|$dst, $src}", []>, TB,
+               Requires<[HasMMX]>;
+
+def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                Requires<[HasSSE2]>;
+def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                Requires<[HasSSE2]>;
+def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -0,0 +1,101 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                "movaps {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE1]>, TB;
+def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                "movapd {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                "movaps {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE1]>, TB;
+def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
+                "movaps {$src, $dst|$dst, $src}",[]>,
+               Requires<[HasSSE1]>, TB;
+def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                "movapd {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE1]>, TB, OpSize;
+def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
+                "movapd {$src, $dst|$dst, $src}",[]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+// Logical
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "andps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
+              Requires<[HasSSE1]>, TB;
+def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "andpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "orps {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE1]>, TB;
+def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "orpd {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "xorps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
+              Requires<[HasSSE1]>, TB;
+def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "xorpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+}
+def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "andps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fand V4F32:$src1,
+                                  (X86loadpv4f32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, TB;
+def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "andpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fand V2F64:$src1,
+                                  (X86loadpv2f64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "orps {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE1]>, TB;
+def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "orpd {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "xorps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fxor V4F32:$src1,
+                                  (X86loadpv4f32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, TB;
+def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "xorpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fxor V2F64:$src1,
+                                  (X86loadpv2f64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+
+def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "andnps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "andnps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "andnpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "andnpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+}