Add *several* AVX integer packed binop instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107225 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-06-29 23:47:49 +00:00
parent 1104645eef
commit 6c9fa43716
3 changed files with 541 additions and 35 deletions

View File

@ -2258,48 +2258,62 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64
addr:$src2))))]>;
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr,
Intrinsic IntId, Intrinsic IntId2> {
string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
(ins VR128:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
}
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT> {
ValueType OpVT, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
(bitconvert (memopv2i64 addr:$src2)))))]>;
}
@ -2308,22 +2322,82 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
/// to collapse (bitconvert VT to VT) into its operand.
///
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode> {
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode VR128:$src1,
(memopv2i64 addr:$src2)))]>;
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
}
} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
let isCommutable = 1 in {
defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 0 /* 3 addr */>, VEX_4V;
defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 0>, VEX_4V;
defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 0>, VEX_4V;
defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 0>, VEX_4V;
defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 0>, VEX_4V;
}
defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0>, VEX_4V;
defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0>, VEX_4V;
defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0>, VEX_4V;
defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0>, VEX_4V;
// Intrinsic forms
defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0>,
VEX_4V;
defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0>,
VEX_4V;
defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0>,
VEX_4V;
defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0>,
VEX_4V;
let isCommutable = 1 in {
defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 0>,
VEX_4V;
defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 0>,
VEX_4V;
defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 0>,
VEX_4V;
defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 0>,
VEX_4V;
defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 0>,
VEX_4V;
defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 0>,
VEX_4V;
defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 0>,
VEX_4V;
defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 0>,
VEX_4V;
defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 0>,
VEX_4V;
defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 0>,
VEX_4V;
defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 0>,
VEX_4V;
defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 0>,
VEX_4V;
defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 0>,
VEX_4V;
defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 0>,
VEX_4V;
defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 0>,
VEX_4V;
}
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in {
defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8>;

View File

@ -10966,3 +10966,219 @@
// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
vstmxcsr 0xdeadbeef
// CHECK: vpsubb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
vpsubb %xmm1, %xmm2, %xmm3
// CHECK: vpsubb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
vpsubb (%eax), %xmm2, %xmm3
// CHECK: vpsubw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
vpsubw %xmm1, %xmm2, %xmm3
// CHECK: vpsubw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
vpsubw (%eax), %xmm2, %xmm3
// CHECK: vpsubd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
vpsubd %xmm1, %xmm2, %xmm3
// CHECK: vpsubd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
vpsubd (%eax), %xmm2, %xmm3
// CHECK: vpsubq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
vpsubq %xmm1, %xmm2, %xmm3
// CHECK: vpsubq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
vpsubq (%eax), %xmm2, %xmm3
// CHECK: vpsubsb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
vpsubsb %xmm1, %xmm2, %xmm3
// CHECK: vpsubsb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
vpsubsb (%eax), %xmm2, %xmm3
// CHECK: vpsubsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
vpsubsw %xmm1, %xmm2, %xmm3
// CHECK: vpsubsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
vpsubsw (%eax), %xmm2, %xmm3
// CHECK: vpsubusb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
vpsubusb %xmm1, %xmm2, %xmm3
// CHECK: vpsubusb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
vpsubusb (%eax), %xmm2, %xmm3
// CHECK: vpsubusw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
vpsubusw %xmm1, %xmm2, %xmm3
// CHECK: vpsubusw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
vpsubusw (%eax), %xmm2, %xmm3
// CHECK: vpaddb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
vpaddb %xmm1, %xmm2, %xmm3
// CHECK: vpaddb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
vpaddb (%eax), %xmm2, %xmm3
// CHECK: vpaddw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
vpaddw %xmm1, %xmm2, %xmm3
// CHECK: vpaddw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
vpaddw (%eax), %xmm2, %xmm3
// CHECK: vpaddd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
vpaddd %xmm1, %xmm2, %xmm3
// CHECK: vpaddd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
vpaddd (%eax), %xmm2, %xmm3
// CHECK: vpaddq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
vpaddq %xmm1, %xmm2, %xmm3
// CHECK: vpaddq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
vpaddq (%eax), %xmm2, %xmm3
// CHECK: vpaddsb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
vpaddsb %xmm1, %xmm2, %xmm3
// CHECK: vpaddsb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
vpaddsb (%eax), %xmm2, %xmm3
// CHECK: vpaddsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
vpaddsw %xmm1, %xmm2, %xmm3
// CHECK: vpaddsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
vpaddsw (%eax), %xmm2, %xmm3
// CHECK: vpaddusb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
vpaddusb %xmm1, %xmm2, %xmm3
// CHECK: vpaddusb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
vpaddusb (%eax), %xmm2, %xmm3
// CHECK: vpaddusw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
vpaddusw %xmm1, %xmm2, %xmm3
// CHECK: vpaddusw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
vpaddusw (%eax), %xmm2, %xmm3
// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
vpmulhuw %xmm1, %xmm2, %xmm3
// CHECK: vpmulhuw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
vpmulhuw (%eax), %xmm2, %xmm3
// CHECK: vpmulhw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
vpmulhw %xmm1, %xmm2, %xmm3
// CHECK: vpmulhw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
vpmulhw (%eax), %xmm2, %xmm3
// CHECK: vpmullw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
vpmullw %xmm1, %xmm2, %xmm3
// CHECK: vpmullw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
vpmullw (%eax), %xmm2, %xmm3
// CHECK: vpmuludq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
vpmuludq %xmm1, %xmm2, %xmm3
// CHECK: vpmuludq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
vpmuludq (%eax), %xmm2, %xmm3
// CHECK: vpavgb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
vpavgb %xmm1, %xmm2, %xmm3
// CHECK: vpavgb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
vpavgb (%eax), %xmm2, %xmm3
// CHECK: vpavgw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
vpavgw %xmm1, %xmm2, %xmm3
// CHECK: vpavgw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
vpavgw (%eax), %xmm2, %xmm3
// CHECK: vpminsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
vpminsw %xmm1, %xmm2, %xmm3
// CHECK: vpminsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
vpminsw (%eax), %xmm2, %xmm3
// CHECK: vpminub %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
vpminub %xmm1, %xmm2, %xmm3
// CHECK: vpminub (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
vpminub (%eax), %xmm2, %xmm3
// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
vpmaxsw %xmm1, %xmm2, %xmm3
// CHECK: vpmaxsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
vpmaxsw (%eax), %xmm2, %xmm3
// CHECK: vpmaxub %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
vpmaxub %xmm1, %xmm2, %xmm3
// CHECK: vpmaxub (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
vpmaxub (%eax), %xmm2, %xmm3
// CHECK: vpsadbw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
vpsadbw %xmm1, %xmm2, %xmm3
// CHECK: vpsadbw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
vpsadbw (%eax), %xmm2, %xmm3

View File

@ -1006,3 +1006,219 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
vstmxcsr -4(%rsp)
// CHECK: vpsubb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
vpsubb %xmm11, %xmm12, %xmm13
// CHECK: vpsubb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
vpsubb (%rax), %xmm12, %xmm13
// CHECK: vpsubw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
vpsubw %xmm11, %xmm12, %xmm13
// CHECK: vpsubw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
vpsubw (%rax), %xmm12, %xmm13
// CHECK: vpsubd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
vpsubd %xmm11, %xmm12, %xmm13
// CHECK: vpsubd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
vpsubd (%rax), %xmm12, %xmm13
// CHECK: vpsubq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
vpsubq %xmm11, %xmm12, %xmm13
// CHECK: vpsubq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
vpsubq (%rax), %xmm12, %xmm13
// CHECK: vpsubsb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
vpsubsb %xmm11, %xmm12, %xmm13
// CHECK: vpsubsb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
vpsubsb (%rax), %xmm12, %xmm13
// CHECK: vpsubsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
vpsubsw %xmm11, %xmm12, %xmm13
// CHECK: vpsubsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
vpsubsw (%rax), %xmm12, %xmm13
// CHECK: vpsubusb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
vpsubusb %xmm11, %xmm12, %xmm13
// CHECK: vpsubusb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
vpsubusb (%rax), %xmm12, %xmm13
// CHECK: vpsubusw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
vpsubusw %xmm11, %xmm12, %xmm13
// CHECK: vpsubusw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
vpsubusw (%rax), %xmm12, %xmm13
// CHECK: vpaddb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
vpaddb %xmm11, %xmm12, %xmm13
// CHECK: vpaddb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
vpaddb (%rax), %xmm12, %xmm13
// CHECK: vpaddw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
vpaddw %xmm11, %xmm12, %xmm13
// CHECK: vpaddw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
vpaddw (%rax), %xmm12, %xmm13
// CHECK: vpaddd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
vpaddd %xmm11, %xmm12, %xmm13
// CHECK: vpaddd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
vpaddd (%rax), %xmm12, %xmm13
// CHECK: vpaddq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
vpaddq %xmm11, %xmm12, %xmm13
// CHECK: vpaddq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
vpaddq (%rax), %xmm12, %xmm13
// CHECK: vpaddsb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
vpaddsb %xmm11, %xmm12, %xmm13
// CHECK: vpaddsb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xec,0x28]
vpaddsb (%rax), %xmm12, %xmm13
// CHECK: vpaddsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
vpaddsw %xmm11, %xmm12, %xmm13
// CHECK: vpaddsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xed,0x28]
vpaddsw (%rax), %xmm12, %xmm13
// CHECK: vpaddusb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
vpaddusb %xmm11, %xmm12, %xmm13
// CHECK: vpaddusb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
vpaddusb (%rax), %xmm12, %xmm13
// CHECK: vpaddusw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
vpaddusw %xmm11, %xmm12, %xmm13
// CHECK: vpaddusw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
vpaddusw (%rax), %xmm12, %xmm13
// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
vpmulhuw %xmm11, %xmm12, %xmm13
// CHECK: vpmulhuw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
vpmulhuw (%rax), %xmm12, %xmm13
// CHECK: vpmulhw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
vpmulhw %xmm11, %xmm12, %xmm13
// CHECK: vpmulhw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
vpmulhw (%rax), %xmm12, %xmm13
// CHECK: vpmullw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
vpmullw %xmm11, %xmm12, %xmm13
// CHECK: vpmullw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
vpmullw (%rax), %xmm12, %xmm13
// CHECK: vpmuludq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
vpmuludq %xmm11, %xmm12, %xmm13
// CHECK: vpmuludq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
vpmuludq (%rax), %xmm12, %xmm13
// CHECK: vpavgb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
vpavgb %xmm11, %xmm12, %xmm13
// CHECK: vpavgb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
vpavgb (%rax), %xmm12, %xmm13
// CHECK: vpavgw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
vpavgw %xmm11, %xmm12, %xmm13
// CHECK: vpavgw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
vpavgw (%rax), %xmm12, %xmm13
// CHECK: vpminsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
vpminsw %xmm11, %xmm12, %xmm13
// CHECK: vpminsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xea,0x28]
vpminsw (%rax), %xmm12, %xmm13
// CHECK: vpminub %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
vpminub %xmm11, %xmm12, %xmm13
// CHECK: vpminub (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xda,0x28]
vpminub (%rax), %xmm12, %xmm13
// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
vpmaxsw %xmm11, %xmm12, %xmm13
// CHECK: vpmaxsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xee,0x28]
vpmaxsw (%rax), %xmm12, %xmm13
// CHECK: vpmaxub %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
vpmaxub %xmm11, %xmm12, %xmm13
// CHECK: vpmaxub (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xde,0x28]
vpmaxub (%rax), %xmm12, %xmm13
// CHECK: vpsadbw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
vpsadbw %xmm11, %xmm12, %xmm13
// CHECK: vpsadbw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
vpsadbw (%rax), %xmm12, %xmm13