diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 09324164917..960b2510a71 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -13,6 +13,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Some 'special' instructions
+let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
+  def FP_TO_INT16_IN_MEM : I<0, Pseudo,
+                            (ops i16mem:$dst, RFP:$src),
+                           "#FP_TO_INT16_IN_MEM PSEUDO!",
+                           [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
+  def FP_TO_INT32_IN_MEM : I<0, Pseudo,
+                            (ops i32mem:$dst, RFP:$src),
+                           "#FP_TO_INT32_IN_MEM PSEUDO!",
+                           [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
+  def FP_TO_INT64_IN_MEM : I<0, Pseudo,
+                            (ops i64mem:$dst, RFP:$src),
+                           "#FP_TO_INT64_IN_MEM PSEUDO!",
+                           [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
+}
+
+let isTerminator = 1 in
+  let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
+    def FP_REG_KILL  : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
+
 // All FP Stack operations are represented with two instructions here.  The
 // first instruction, generated by the instruction selector, uses "RFP"
 // registers: a traditional register file to reference floating point values.
@@ -379,3 +399,22 @@ def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                   (ops i16mem:$dst), "fnstcw $dst", []>;
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (ops i16mem:$dst), "fldcw $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 values.
+def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
+
+// Required for CALL which return f32 / f64 values.
+def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
+def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
+def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d5fbd9cc15d..c8f55529a13 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -403,47 +403,6 @@ def IMPLICIT_DEF_R16  : I<0, Pseudo, (ops R16:$dst),
 def IMPLICIT_DEF_R32  : I<0, Pseudo, (ops R32:$dst),
                          "#IMPLICIT_DEF $dst",
                          [(set R32:$dst, (undef))]>;
-def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
-                         "#IMPLICIT_DEF $dst",
-                         [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
-def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
-                         "#IMPLICIT_DEF $dst",
-                         [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
-
-
-// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
-  def CMOV_FR32 : I<0, Pseudo,
-                    (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
-                    "#CMOV_FR32 PSEUDO!",
-                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
-  def CMOV_FR64 : I<0, Pseudo,
-                    (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
-                    "#CMOV_FR64 PSEUDO!",
-                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
-}
-
-let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
-  def FP_TO_INT16_IN_MEM : I<0, Pseudo,
-                            (ops i16mem:$dst, RFP:$src),
-                           "#FP_TO_INT16_IN_MEM PSEUDO!",
-                           [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
-  def FP_TO_INT32_IN_MEM : I<0, Pseudo,
-                            (ops i32mem:$dst, RFP:$src),
-                           "#FP_TO_INT32_IN_MEM PSEUDO!",
-                           [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
-  def FP_TO_INT64_IN_MEM : I<0, Pseudo,
-                            (ops i64mem:$dst, RFP:$src),
-                           "#FP_TO_INT64_IN_MEM PSEUDO!",
-                           [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
-}
-
-
-let isTerminator = 1 in
-  let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
-    def FP_REG_KILL  : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
-
 
 // Nop
 def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
@@ -1690,7 +1649,6 @@ let isTwoAddress = 0 in {
 
 
 // Double shift instructions (generalizations of rotate)
-
 def SHLD32rrCL : I<0xA5, MRMDestReg, (ops R32:$dst, R32:$src1, R32:$src2),
                    "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
                    [(set R32:$dst, (X86shld R32:$src1, R32:$src2, CL))]>,
@@ -2389,24 +2347,6 @@ def MOV32r0  : I<0x31, MRMInitReg,  (ops R32:$dst),
                  "xor{l} $dst, $dst",
                  [(set R32:$dst, 0)]>;
 
-//===----------------------------------------------------------------------===//
-// Floating Point Stack Support
-//===----------------------------------------------------------------------===//
-
-include "X86InstrFPStack.td"
-
-//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
-//===----------------------------------------------------------------------===//
-
-include "X86InstrMMX.td"
-
-//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
-//===----------------------------------------------------------------------===//
-
-include "X86InstrSSE.td"
-
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
@@ -2460,21 +2400,6 @@ def : Pat<(i16 (anyext R8 :$src)), (MOVZX16rr8  R8 :$src)>;
 def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8  R8 :$src)>;
 def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>;
 
-// Required for RET of f32 / f64 values.
-def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
-def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
-
-// Required for CALL which return f32 / f64 values.
-def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
-def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
-
-// Floating point constant -0.0 and -1.0
-def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
-def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
-
-// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
-
 //===----------------------------------------------------------------------===//
 // Some peepholes
 //===----------------------------------------------------------------------===//
@@ -2519,3 +2444,22 @@ def : Pat<(or (shl R16:$src1, CL:$amt),
 def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
                      (srl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
           (SHLD16mrCL addr:$dst, R16:$src2)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating Point Stack Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFPStack.td"
+
+//===----------------------------------------------------------------------===//
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrMMX.td"
+
+//===----------------------------------------------------------------------===//
+// XMM Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrSSE.td"
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 45f7f8a3c69..7a4f6ead892 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -24,17 +24,6 @@ def MOVD64mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src),
                  "movd {$src, $dst|$dst, $src}", []>, TB,
                Requires<[HasMMX]>;
 
-def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-
-
 def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
                  "movq {$src, $dst|$dst, $src}", []>, TB,
                Requires<[HasMMX]>;
@@ -44,14 +33,3 @@ def MOVQ64rm : I<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
 def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
                  "movq {$src, $dst|$dst, $src}", []>, TB,
                Requires<[HasMMX]>;
-
-def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
-def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
-def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 722cfba0719..94e78b191f5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -13,6 +13,337 @@
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// SSE scalar FP Instructions
+//===----------------------------------------------------------------------===//
+
+// Some 'special' instructions
+def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
+                         "#IMPLICIT_DEF $dst",
+                         [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
+def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
+                         "#IMPLICIT_DEF $dst",
+                         [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
+
+// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
+  def CMOV_FR32 : I<0, Pseudo,
+                    (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
+                    "#CMOV_FR32 PSEUDO!",
+                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
+  def CMOV_FR64 : I<0, Pseudo,
+                    (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
+                    "#CMOV_FR64 PSEUDO!",
+                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
+}
+
+// Move Instructions
+def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                "movss {$src, $dst|$dst, $src}", []>,
+              Requires<[HasSSE1]>, XS;
+def MOVSDrr : I<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                "movsd {$src, $dst|$dst, $src}", []>,
+              Requires<[HasSSE2]>, XD;
+
+def MOVSSrm : I<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                "movss {$src, $dst|$dst, $src}",
+                [(set FR32:$dst, (loadf32 addr:$src))]>,
+              Requires<[HasSSE1]>, XS;
+def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
+                "movss {$src, $dst|$dst, $src}",
+                [(store FR32:$src, addr:$dst)]>,
+              Requires<[HasSSE1]>, XS;
+def MOVSDrm : I<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                "movsd {$src, $dst|$dst, $src}",
+                [(set FR64:$dst, (loadf64 addr:$src))]>,
+              Requires<[HasSSE2]>, XD;
+def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
+                "movsd {$src, $dst|$dst, $src}",
+                [(store FR64:$src, addr:$dst)]>,
+              Requires<[HasSSE2]>, XD;
+
+// Conversion instructions
+def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
+                   "cvttss2si {$src, $dst|$dst, $src}",
+                   [(set R32:$dst, (fp_to_sint FR32:$src))]>,
+                 Requires<[HasSSE1]>, XS;
+def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
+                   "cvttss2si {$src, $dst|$dst, $src}",
+                   [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>,
+                 Requires<[HasSSE1]>, XS;
+def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
+                   "cvttsd2si {$src, $dst|$dst, $src}",
+                   [(set R32:$dst, (fp_to_sint FR64:$src))]>,
+                 Requires<[HasSSE2]>, XD;
+def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
+                   "cvttsd2si {$src, $dst|$dst, $src}",
+                   [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>,
+                 Requires<[HasSSE2]>, XD;
+def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
+                  "cvtss2sd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (fextend FR32:$src))]>,
+                Requires<[HasSSE2]>, XS;
+def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
+                  "cvtss2sd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>,
+                Requires<[HasSSE2]>, XS;
+def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
+                  "cvtsd2ss {$src, $dst|$dst, $src}",
+                  [(set FR32:$dst, (fround FR64:$src))]>,
+                Requires<[HasSSE2]>, XD;
+def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src), 
+                  "cvtsd2ss {$src, $dst|$dst, $src}",
+                  [(set FR32:$dst, (fround (loadf64 addr:$src)))]>,
+                Requires<[HasSSE2]>, XD;
+def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
+                  "cvtsi2ss {$src, $dst|$dst, $src}",
+                  [(set FR32:$dst, (sint_to_fp R32:$src))]>,
+                Requires<[HasSSE2]>, XS;
+def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
+                  "cvtsi2ss {$src, $dst|$dst, $src}",
+                  [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
+                Requires<[HasSSE2]>, XS;
+def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
+                  "cvtsi2sd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (sint_to_fp R32:$src))]>,
+                Requires<[HasSSE2]>, XD;
+def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
+                  "cvtsi2sd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
+                Requires<[HasSSE2]>, XD;
+
+// Arithmetic instructions
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                "addss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>,
+              Requires<[HasSSE1]>, XS;
+def ADDSDrr : I<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                "addsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>,
+              Requires<[HasSSE2]>, XD;
+def MULSSrr : I<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                "mulss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>,
+              Requires<[HasSSE1]>, XS;
+def MULSDrr : I<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                "mulsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>,
+              Requires<[HasSSE2]>, XD;
+}
+
+def ADDSSrm : I<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                "addss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, XS;
+def ADDSDrm : I<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                "addsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, XD;
+def MULSSrm : I<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                "mulss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, XS;
+def MULSDrm : I<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                "mulsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, XD;
+
+def DIVSSrr : I<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                "divss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>,
+              Requires<[HasSSE1]>, XS;
+def DIVSSrm : I<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                "divss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, XS;
+def DIVSDrr : I<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                "divsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>,
+              Requires<[HasSSE2]>, XD;
+def DIVSDrm : I<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                "divsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, XD;
+
+def SUBSSrr : I<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                "subss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>,
+              Requires<[HasSSE1]>, XS;
+def SUBSSrm : I<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                "subss {$src2, $dst|$dst, $src2}",
+                [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, XS;
+def SUBSDrr : I<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                "subsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>,
+              Requires<[HasSSE2]>, XD;
+def SUBSDrm : I<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                "subsd {$src2, $dst|$dst, $src2}",
+                [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, XD;
+}
+
+def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                 "sqrtss {$src, $dst|$dst, $src}",
+                 [(set FR32:$dst, (fsqrt FR32:$src))]>,
+               Requires<[HasSSE1]>, XS;
+def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                 "sqrtss {$src, $dst|$dst, $src}",
+                 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
+               Requires<[HasSSE1]>, XS;
+def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                 "sqrtsd {$src, $dst|$dst, $src}",
+                 [(set FR64:$dst, (fsqrt FR64:$src))]>,
+               Requires<[HasSSE2]>, XD;
+def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                 "sqrtsd {$src, $dst|$dst, $src}",
+                 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
+               Requires<[HasSSE2]>, XD;
+
+// Comparison instructions
+let isTwoAddress = 1 in {
+def CMPSSrr : I<0xC2, MRMSrcReg, 
+                (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
+                "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
+              Requires<[HasSSE1]>, XS;
+def CMPSSrm : I<0xC2, MRMSrcMem, 
+                (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
+                "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
+              Requires<[HasSSE1]>, XS;
+def CMPSDrr : I<0xC2, MRMSrcReg, 
+                (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
+                "cmp${cc}sd {$src, $dst|$dst, $src}", []>,
+              Requires<[HasSSE1]>, XD;
+def CMPSDrm : I<0xC2, MRMSrcMem, 
+                (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
+                "cmp${cc}sd {$src, $dst|$dst, $src}", []>,
+              Requires<[HasSSE2]>, XD;
+}
+
+def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
+                 "ucomiss {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR32:$src1, FR32:$src2)]>,
+               Requires<[HasSSE1]>, TB;
+def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
+                 "ucomiss {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
+               Requires<[HasSSE1]>, TB;
+def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
+                 "ucomisd {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR64:$src1, FR64:$src2)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
+                 "ucomisd {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+// Aliases of packed instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
+                 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+               Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
+                 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
+// Upper bits are disregarded.
+def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                   "movaps {$src, $dst|$dst, $src}", []>,
+                 Requires<[HasSSE1]>, TB;
+def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                   "movapd {$src, $dst|$dst, $src}", []>,
+                 Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
+// Upper bits are disregarded.
+def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
+                   "movaps {$src, $dst|$dst, $src}",
+                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
+                 Requires<[HasSSE1]>, TB;
+def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
+                  "movapd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                  "andps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
+                Requires<[HasSSE1]>, TB;
+def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  "andpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                 "orps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                 "orpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                  "xorps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
+                Requires<[HasSSE1]>, TB;
+def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  "xorpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+}
+def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                  "andps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fand FR32:$src1,
+                                    (X86loadpf32 addr:$src2)))]>,
+                Requires<[HasSSE1]>, TB;
+def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                  "andpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fand FR64:$src1,
+                                    (X86loadpf64 addr:$src2)))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                 "orps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                 "orpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                  "xorps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86fxor FR32:$src1,
+                                    (X86loadpf32 addr:$src2)))]>,
+                Requires<[HasSSE1]>, TB;
+def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                  "xorpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86fxor FR64:$src1,
+                                    (X86loadpf64 addr:$src2)))]>,
+                Requires<[HasSSE2]>, TB, OpSize;
+
+def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                   "andnps {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE1]>, TB;
+def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                   "andnps {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE1]>, TB;
+def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                   "andnpd {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE2]>, TB, OpSize;
+def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                   "andnpd {$src2, $dst|$dst, $src2}", []>,
+                 Requires<[HasSSE2]>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
 def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
                 "movaps {$src, $dst|$dst, $src}", []>,
                Requires<[HasSSE1]>, TB;
@@ -100,301 +431,27 @@ def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
                Requires<[HasSSE2]>, TB, OpSize;
 }
 
-def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
-                "movss {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XS;
-def MOVSDrr : I<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
-                "movsd {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE2]>, XD;
-
-def MOVSSrm : I<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
-                "movss {$src, $dst|$dst, $src}",
-                [(set FR32:$dst, (loadf32 addr:$src))]>,
-              Requires<[HasSSE1]>, XS;
-def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
-                "movss {$src, $dst|$dst, $src}",
-                [(store FR32:$src, addr:$dst)]>,
-              Requires<[HasSSE1]>, XS;
-def MOVSDrm : I<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
-                "movsd {$src, $dst|$dst, $src}",
-                [(set FR64:$dst, (loadf64 addr:$src))]>,
-              Requires<[HasSSE2]>, XD;
-def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
-                "movsd {$src, $dst|$dst, $src}",
-                [(store FR64:$src, addr:$dst)]>,
-              Requires<[HasSSE2]>, XD;
-
-def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
-                   "cvttss2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint FR32:$src))]>,
-                 Requires<[HasSSE1]>, XS;
-def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
-                   "cvttss2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>,
-                 Requires<[HasSSE1]>, XS;
-def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
-                   "cvttsd2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint FR64:$src))]>,
-                 Requires<[HasSSE2]>, XD;
-def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
-                   "cvttsd2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>,
-                 Requires<[HasSSE2]>, XD;
-def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
-                  "cvtss2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (fextend FR32:$src))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
-                  "cvtss2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
-                  "cvtsd2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (fround FR64:$src))]>,
-                Requires<[HasSSE2]>, XD;
-def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src), 
-                  "cvtsd2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (fround (loadf64 addr:$src)))]>,
-                Requires<[HasSSE2]>, XD;
-def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
-                  "cvtsi2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (sint_to_fp R32:$src))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
-                  "cvtsi2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
-                  "cvtsi2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (sint_to_fp R32:$src))]>,
-                Requires<[HasSSE2]>, XD;
-def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
-                  "cvtsi2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
-                Requires<[HasSSE2]>, XD;
-
-def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
-                 "sqrtss {$src, $dst|$dst, $src}",
-                 [(set FR32:$dst, (fsqrt FR32:$src))]>,
-               Requires<[HasSSE1]>, XS;
-def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
-                 "sqrtss {$src, $dst|$dst, $src}",
-                 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
-               Requires<[HasSSE1]>, XS;
-def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
-                 "sqrtsd {$src, $dst|$dst, $src}",
-                 [(set FR64:$dst, (fsqrt FR64:$src))]>,
-               Requires<[HasSSE2]>, XD;
-def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
-                 "sqrtsd {$src, $dst|$dst, $src}",
-                 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
-               Requires<[HasSSE2]>, XD;
-
-def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
-                 "ucomiss {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR32:$src1, FR32:$src2)]>,
-               Requires<[HasSSE1]>, TB;
-def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
-                 "ucomiss {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
-               Requires<[HasSSE1]>, TB;
-def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
-                 "ucomisd {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR64:$src1, FR64:$src2)]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
-                 "ucomisd {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
-let isTwoAddress = 1 in {
-// SSE Scalar Arithmetic
-let isCommutable = 1 in {
-def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                "addss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def ADDSDrr : I<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                "addsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-def MULSSrr : I<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                "mulss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def MULSDrr : I<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                "mulsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-}
-
-def ADDSSrm : I<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
-                "addss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def ADDSDrm : I<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
-                "addsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
-def MULSSrm : I<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
-                "mulss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def MULSDrm : I<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
-                "mulsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
-
-def DIVSSrr : I<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                "divss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def DIVSSrm : I<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
-                "divss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def DIVSDrr : I<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                "divsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-def DIVSDrm : I<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
-                "divsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
-
-def SUBSSrr : I<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                "subss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def SUBSSrm : I<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
-                "subss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def SUBSDrr : I<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                "subsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-def SUBSDrm : I<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
-                "subsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
-
-// SSE compare
-def CMPSSrr : I<0xC2, MRMSrcReg, 
-                (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
-                "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XS;
-def CMPSSrm : I<0xC2, MRMSrcMem, 
-                (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
-                "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XS;
-def CMPSDrr : I<0xC2, MRMSrcReg, 
-                (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
-                "cmp${cc}sd {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XD;
-def CMPSDrm : I<0xC2, MRMSrcMem, 
-                (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
-                "cmp${cc}sd {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE2]>, XD;
-}
-
-
 //===----------------------------------------------------------------------===//
-// Alias Instructions
+// SSE integer instructions
 //===----------------------------------------------------------------------===//
 
-// Alias instructions that map fld0 to pxor for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
-                 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
-               Requires<[HasSSE1]>, TB, OpSize;
-def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
-                 "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
-               Requires<[HasSSE2]>, TB, OpSize;
+// Move Instructions
+def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
 
-// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
-// Upper bits are disregarded.
-def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
-                   "movaps {$src, $dst|$dst, $src}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
-                   "movapd {$src, $dst|$dst, $src}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
-// Upper bits are disregarded.
-def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
-                   "movaps {$src, $dst|$dst, $src}",
-                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
-                 Requires<[HasSSE1]>, TB;
-def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
-                  "movapd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let isTwoAddress = 1 in {
-let isCommutable = 1 in {
-def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                  "andps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
-                Requires<[HasSSE1]>, TB;
-def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                  "andpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                 "orps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                 "orpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                  "xorps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
-                Requires<[HasSSE1]>, TB;
-def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                  "xorpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-}
-def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                  "andps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fand FR32:$src1,
-                                    (X86loadpf32 addr:$src2)))]>,
-                Requires<[HasSSE1]>, TB;
-def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                  "andpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fand FR64:$src1,
-                                    (X86loadpf64 addr:$src2)))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                 "orps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                 "orpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                  "xorps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fxor FR32:$src1,
-                                    (X86loadpf32 addr:$src2)))]>,
-                Requires<[HasSSE1]>, TB;
-def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                  "xorpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fxor FR64:$src1,
-                                    (X86loadpf64 addr:$src2)))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-
-def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                   "andnps {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                   "andnps {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                   "andnpd {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                   "andnpd {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-}
+def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                Requires<[HasSSE2]>;
+def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                Requires<[HasSSE2]>;
+def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;