Add 256-bit vaddsub, vhadd, vhsub, vblend and vdpp instructions!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108769 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-19 23:32:44 +00:00
parent 58b8176ed3
commit 94143ee625
9 changed files with 273 additions and 81 deletions

View File

@ -56,6 +56,9 @@ public:
void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}

View File

@ -64,6 +64,10 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "YMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "DWORD PTR ";
printMemReference(MI, OpNo, O);

View File

@ -213,6 +213,7 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
// FIXME: move this to a more appropriate place after all AVX is done.
def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;

View File

@ -222,7 +222,7 @@ def i16mem : X86MemOperand<"printi16mem">;
def i32mem : X86MemOperand<"printi32mem">;
def i64mem : X86MemOperand<"printi64mem">;
def i128mem : X86MemOperand<"printi128mem">;
//def i256mem : X86MemOperand<"printi256mem">;
def i256mem : X86MemOperand<"printi256mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;

View File

@ -3125,35 +3125,41 @@ let AddedComplexity = 20 in
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, bit Is2Addr = 1> {
def rr : I<0xD0, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
(outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src2))]>;
[(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
def rm : I<0xD0, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (Int VR128:$src1,
(memop addr:$src2)))]>;
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
VEX_4V;
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
VEX_4V;
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
f128mem, 0>, XD, VEX_4V;
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
f128mem, 0>, OpSize, VEX_4V;
let Pattern = []<dag> in {
defm VADDSUBPSY : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR256,
f256mem, 0>, XD, VEX_4V;
defm VADDSUBPDY : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR256,
f256mem, 0>, OpSize, VEX_4V;
}
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
f128mem>, XD;
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
f128mem>, TB, OpSize;
}
//===---------------------------------------------------------------------===//
@ -3161,51 +3167,65 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
//===---------------------------------------------------------------------===//
// Horizontal ops
class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
[(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
[(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
int_x86_sse3_hadd_ps, 0>, VEX_4V;
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
int_x86_sse3_hadd_pd, 0>, VEX_4V;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
int_x86_sse3_hsub_ps, 0>, VEX_4V;
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
int_x86_sse3_hsub_pd, 0>, VEX_4V;
let Pattern = []<dag> in {
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
int_x86_sse3_hadd_ps, 0>, VEX_4V;
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
int_x86_sse3_hadd_pd, 0>, VEX_4V;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
int_x86_sse3_hsub_ps, 0>, VEX_4V;
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
int_x86_sse3_hsub_pd, 0>, VEX_4V;
}
}
let Constraints = "$src1 = $dst" in {
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
int_x86_sse3_hadd_ps>;
defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
int_x86_sse3_hadd_pd>;
defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
int_x86_sse3_hsub_ps>;
defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
int_x86_sse3_hsub_pd>;
}
//===---------------------------------------------------------------------===//
@ -4431,79 +4451,98 @@ let Constraints = "$src1 = $dst" in
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Is2Addr = 1> {
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
OpSize;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
[(set RC:$dst,
(IntId RC:$src1,
(bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
0>, VEX_4V;
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
0>, VEX_4V;
VR128, memopv16i8, i128mem, 0>, VEX_4V;
let Pattern = []<dag> in {
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
VR256, memopv32i8, i256mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
0>, VEX_4V;
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
0>, VEX_4V;
VR128, memopv16i8, i128mem, 0>, VEX_4V;
}
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
0>, VEX_4V;
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
0>, VEX_4V;
VR128, memopv16i8, i128mem, 0>, VEX_4V;
let Pattern = []<dag> in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
VR128, memopv16i8, i128mem>;
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
VR128, memopv16i8, i128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
VR128, memopv16i8, i128mem>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
VR128, memopv16i8, i128mem>;
}
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv16i8, i128mem>;
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
VR128, memopv16i8, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop> {
def rr : I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
}
def rm : I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
}
}
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem>;
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem>;
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem>;
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {

View File

@ -804,7 +804,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}];
}
def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {

View File

@ -12926,3 +12926,75 @@
// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
vcmptrue_usps %ymm1, %ymm2, %ymm3
// CHECK: vaddsubps %ymm1, %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xef,0xd0,0xd9]
vaddsubps %ymm1, %ymm2, %ymm3
// CHECK: vaddsubps (%eax), %ymm1, %ymm2
// CHECK: encoding: [0xc5,0xf7,0xd0,0x10]
vaddsubps (%eax), %ymm1, %ymm2
// CHECK: vaddsubpd %ymm1, %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xed,0xd0,0xd9]
vaddsubpd %ymm1, %ymm2, %ymm3
// CHECK: vaddsubpd (%eax), %ymm1, %ymm2
// CHECK: encoding: [0xc5,0xf5,0xd0,0x10]
vaddsubpd (%eax), %ymm1, %ymm2
// CHECK: vhaddps %ymm1, %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xef,0x7c,0xd9]
vhaddps %ymm1, %ymm2, %ymm3
// CHECK: vhaddps (%eax), %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xef,0x7c,0x18]
vhaddps (%eax), %ymm2, %ymm3
// CHECK: vhaddpd %ymm1, %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xed,0x7c,0xd9]
vhaddpd %ymm1, %ymm2, %ymm3
// CHECK: vhaddpd (%eax), %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xed,0x7c,0x18]
vhaddpd (%eax), %ymm2, %ymm3
// CHECK: vhsubps %ymm1, %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xef,0x7d,0xd9]
vhsubps %ymm1, %ymm2, %ymm3
// CHECK: vhsubps (%eax), %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xef,0x7d,0x18]
vhsubps (%eax), %ymm2, %ymm3
// CHECK: vhsubpd %ymm1, %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xed,0x7d,0xd9]
vhsubpd %ymm1, %ymm2, %ymm3
// CHECK: vhsubpd (%eax), %ymm2, %ymm3
// CHECK: encoding: [0xc5,0xed,0x7d,0x18]
vhsubpd (%eax), %ymm2, %ymm3
// CHECK: vblendps $3, %ymm2, %ymm5, %ymm1
// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03]
vblendps $3, %ymm2, %ymm5, %ymm1
// CHECK: vblendps $3, (%eax), %ymm5, %ymm1
// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03]
vblendps $3, (%eax), %ymm5, %ymm1
// CHECK: vblendpd $3, %ymm2, %ymm5, %ymm1
// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03]
vblendpd $3, %ymm2, %ymm5, %ymm1
// CHECK: vblendpd $3, (%eax), %ymm5, %ymm1
// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03]
vblendpd $3, (%eax), %ymm5, %ymm1
// CHECK: vdpps $3, %ymm2, %ymm5, %ymm1
// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03]
vdpps $3, %ymm2, %ymm5, %ymm1
// CHECK: vdpps $3, (%eax), %ymm5, %ymm1
// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03]
vdpps $3, (%eax), %ymm5, %ymm1

View File

@ -3000,3 +3000,75 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
vcmptrue_usps %ymm11, %ymm12, %ymm13
// CHECK: vaddsubps %ymm11, %ymm12, %ymm13
// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb]
vaddsubps %ymm11, %ymm12, %ymm13
// CHECK: vaddsubps (%rax), %ymm11, %ymm12
// CHECK: encoding: [0xc5,0x27,0xd0,0x20]
vaddsubps (%rax), %ymm11, %ymm12
// CHECK: vaddsubpd %ymm11, %ymm12, %ymm13
// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb]
vaddsubpd %ymm11, %ymm12, %ymm13
// CHECK: vaddsubpd (%rax), %ymm11, %ymm12
// CHECK: encoding: [0xc5,0x25,0xd0,0x20]
vaddsubpd (%rax), %ymm11, %ymm12
// CHECK: vhaddps %ymm11, %ymm12, %ymm13
// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb]
vhaddps %ymm11, %ymm12, %ymm13
// CHECK: vhaddps (%rax), %ymm12, %ymm13
// CHECK: encoding: [0xc5,0x1f,0x7c,0x28]
vhaddps (%rax), %ymm12, %ymm13
// CHECK: vhaddpd %ymm11, %ymm12, %ymm13
// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb]
vhaddpd %ymm11, %ymm12, %ymm13
// CHECK: vhaddpd (%rax), %ymm12, %ymm13
// CHECK: encoding: [0xc5,0x1d,0x7c,0x28]
vhaddpd (%rax), %ymm12, %ymm13
// CHECK: vhsubps %ymm11, %ymm12, %ymm13
// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb]
vhsubps %ymm11, %ymm12, %ymm13
// CHECK: vhsubps (%rax), %ymm12, %ymm13
// CHECK: encoding: [0xc5,0x1f,0x7d,0x28]
vhsubps (%rax), %ymm12, %ymm13
// CHECK: vhsubpd %ymm11, %ymm12, %ymm13
// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb]
vhsubpd %ymm11, %ymm12, %ymm13
// CHECK: vhsubpd (%rax), %ymm12, %ymm13
// CHECK: encoding: [0xc5,0x1d,0x7d,0x28]
vhsubpd (%rax), %ymm12, %ymm13
// CHECK: vblendps $3, %ymm12, %ymm10, %ymm11
// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03]
vblendps $3, %ymm12, %ymm10, %ymm11
// CHECK: vblendps $3, (%rax), %ymm10, %ymm11
// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03]
vblendps $3, (%rax), %ymm10, %ymm11
// CHECK: vblendpd $3, %ymm12, %ymm10, %ymm11
// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03]
vblendpd $3, %ymm12, %ymm10, %ymm11
// CHECK: vblendpd $3, (%rax), %ymm10, %ymm11
// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03]
vblendpd $3, (%rax), %ymm10, %ymm11
// CHECK: vdpps $3, %ymm12, %ymm10, %ymm11
// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03]
vdpps $3, %ymm12, %ymm10, %ymm11
// CHECK: vdpps $3, (%rax), %ymm10, %ymm11
// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03]
vdpps $3, (%rax), %ymm10, %ymm11

View File

@ -339,6 +339,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
MEM("f80mem");
MEM("opaque80mem");
MEM("i128mem");
MEM("i256mem");
MEM("f128mem");
MEM("f256mem");
MEM("opaque512mem");