More AVX: {ADD,SUB,MUL,DIV}{PD,PS}rr

Handle OpSize TSFlag for AVX git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105869 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-12 07:37:34 +00:00 · 2010-06-12 01:23:26 +00:00 · 2010-06-12 01:23:26 +00:00 · 7be0d2c8e9
commit 7be0d2c8e9
parent 729aab3dd3
5 changed files with 98 additions and 1 deletions
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@ -220,6 +220,7 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
 //   PSI   - SSE1 instructions with TB prefix.
 //   PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
 //   VSSI  - SSE1 instructions with XS prefix in AVX form.
+//   VPSI  - SSE1 instructions with TB prefix in AVX form.

 class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
@ -237,6 +238,10 @@ class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
           list<dag> pattern>
      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, VEX_4V,
        Requires<[HasAVX, HasSSE1]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+        VEX_4V, Requires<[HasAVX, HasSSE1]>;

 // SSE2 Instruction Templates:
 // 
@ -246,6 +251,7 @@ class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   PDI    - SSE2 instructions with TB and OpSize prefixes.
 //   PDIi8  - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
 //   VSDI   - SSE2 instructions with XD prefix in AVX form.
+//   VPDI   - SSE2 instructions with TB and OpSize prefixes in AVX form.

 class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
@ -266,6 +272,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
           list<dag> pattern>
      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, VEX_4V,
        Requires<[HasAVX, HasSSE2]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+        VEX_4V, OpSize, Requires<[HasAVX, HasSSE2]>;

 // SSE3 Instruction Templates:
 // 
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -737,6 +737,26 @@ multiclass basic_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
    let isCommutable = Commutable;
  }

+  def V#NAME#PSrr : VPSI<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2),
+               !strconcat(OpcodeStr,
+                          "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []> {
+    let isCommutable = Commutable;
+    let Constraints = "";
+    let isAsmParserOnly = 1;
+  }
+
+  def V#NAME#PDrr : VPDI<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2),
+               !strconcat(OpcodeStr,
+                          "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []> {
+    let isCommutable = Commutable;
+    let Constraints = "";
+    let isAsmParserOnly = 1;
+  }
+
  // Vector operation, reg+mem.
  def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
                                 (ins VR128:$src1, f128mem:$src2),
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@ -392,12 +392,16 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
  // functionality of a SIMD prefix
  //
  //  0b00: None
-  //  0b01: 66 (not handled yet)
+  //  0b01: 66
  //  0b10: F3
  //  0b11: F2
  //
  unsigned char VEX_PP = 0;

+  // Encode the operand size opcode prefix as needed.
+  if (TSFlags & X86II::OpSize)
+    VEX_PP = 0x01;
+
  switch (TSFlags & X86II::Op0Mask) {
  default: assert(0 && "Invalid prefix!");
  case 0: break;  // No prefix!
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@ -10117,3 +10117,34 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK:  encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
          vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5

+// CHECK: vaddps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x58,0xd4]
+          vaddps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4]
+          vsubps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x59,0xd4]
+          vmulps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4]
+          vdivps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x58,0xd4]
+          vaddpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4]
+          vsubpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x59,0xd4]
+          vmulpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4]
+          vdivpd  %xmm4, %xmm6, %xmm2
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@ -167,3 +167,35 @@ vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK:   vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK:   encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc]
 vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa]
+vaddps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa]
+vsubps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa]
+vmulps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa]
+vdivps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa]
+vaddpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa]
+vsubpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa]
+vmulpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa]
+vdivpd  %xmm10, %xmm11, %xmm15