From 164aa8c3fe3ab9039dfa28bd6246cd736dc9dcf0 Mon Sep 17 00:00:00 2001 From: kanjitalk755 Date: Wed, 5 May 2021 23:18:15 +0900 Subject: [PATCH] copy codegen_x86.h from BasiliskII --- .../src/kpx_cpu/src/cpu/jit/x86/codegen_x86.h | 2565 +++++++++++++++++ 1 file changed, 2565 insertions(+) create mode 100644 SheepShaver/src/kpx_cpu/src/cpu/jit/x86/codegen_x86.h diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/x86/codegen_x86.h b/SheepShaver/src/kpx_cpu/src/cpu/jit/x86/codegen_x86.h new file mode 100644 index 00000000..08538b7a --- /dev/null +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/x86/codegen_x86.h @@ -0,0 +1,2565 @@ +/******************** -*- mode: C; tab-width: 8 -*- ******************** + * + * Run-time assembler for IA-32 and AMD64 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * This file is derived from CCG. + * + * Copyright 1999, 2000, 2001, 2002, 2003 Ian Piumarta + * + * Adaptations and enhancements for AMD64 support, Copyright 2003-2008 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************/ + +#ifndef X86_RTASM_H +#define X86_RTASM_H + +/* NOTES + * + * o Best viewed on a 1024x768 screen with fixed-6x10 font ;-) + * + * TODO + * + * o Fix FIXMEs + * o SSE instructions + * o Optimize for cases where register numbers are not integral constants + */ + +/* --- Configuration ------------------------------------------------------- */ + +/* Define to settle a "flat" register set, i.e. different regno for + each size variant. */ +#ifndef X86_FLAT_REGISTERS +#define X86_FLAT_REGISTERS 1 +#endif + +/* Define to generate x86-64 code. */ +#ifndef X86_TARGET_64BIT +#define X86_TARGET_64BIT 0 +#endif + +/* Define to optimize ALU instructions. */ +#ifndef X86_OPTIMIZE_ALU +#define X86_OPTIMIZE_ALU 1 +#endif + +/* Define to optimize rotate/shift instructions. */ +#ifndef X86_OPTIMIZE_ROTSHI +#define X86_OPTIMIZE_ROTSHI 1 +#endif + +/* Define to optimize absolute addresses for RIP relative addressing. */ +#ifndef X86_RIP_RELATIVE_ADDR +#define X86_RIP_RELATIVE_ADDR 1 +#endif + + +/* --- Macros -------------------------------------------------------------- */ + +/* Functions used to emit code. + * + * x86_emit_byte(B) + * x86_emit_word(W) + * x86_emit_long(L) + */ + +/* Get pointer to current code + * + * x86_get_target() + */ + +/* Abort assembler, fatal failure. + * + * x86_emit_failure(MSG) + */ + +#define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0) + + +/* --- Register set -------------------------------------------------------- */ + +enum { + X86_RIP = -2, +#if X86_FLAT_REGISTERS + X86_NOREG = 0, + X86_Reg8L_Base = 0x10, + X86_Reg8H_Base = 0x20, + X86_Reg16_Base = 0x30, + X86_Reg32_Base = 0x40, + X86_Reg64_Base = 0x50, + X86_RegMMX_Base = 0x60, + X86_RegXMM_Base = 0x70, + X86_RegFPU_Base = 0x80 +#else + X86_NOREG = -1, + X86_Reg8L_Base = 0, + X86_Reg8H_Base = 16, + X86_Reg16_Base = 0, + X86_Reg32_Base = 0, + X86_Reg64_Base = 0, + X86_RegMMX_Base = 0, + X86_RegXMM_Base = 0, + X86_RegFPU_Base = 0 +#endif +}; + +enum { + X86_AL = X86_Reg8L_Base, + X86_CL, X86_DL, X86_BL, + X86_SPL, X86_BPL, X86_SIL, X86_DIL, + X86_R8B, X86_R9B, X86_R10B, X86_R11B, + X86_R12B, X86_R13B, X86_R14B, X86_R15B, + X86_AH = X86_Reg8H_Base + 4, + X86_CH, X86_DH, X86_BH +}; + +enum { + X86_AX = X86_Reg16_Base, + X86_CX, X86_DX, X86_BX, + X86_SP, X86_BP, X86_SI, X86_DI, + X86_R8W, X86_R9W, X86_R10W, X86_R11W, + X86_R12W, X86_R13W, X86_R14W, X86_R15W +}; + +enum { + X86_EAX = X86_Reg32_Base, + X86_ECX, X86_EDX, X86_EBX, + X86_ESP, X86_EBP, X86_ESI, X86_EDI, + X86_R8D, X86_R9D, X86_R10D, X86_R11D, + X86_R12D, X86_R13D, X86_R14D, X86_R15D +}; + +enum { + X86_RAX = X86_Reg64_Base, + X86_RCX, X86_RDX, X86_RBX, + X86_RSP, X86_RBP, X86_RSI, X86_RDI, + X86_R8, X86_R9, X86_R10, X86_R11, + X86_R12, X86_R13, X86_R14, X86_R15 +}; + +enum { + X86_MM0 = X86_RegMMX_Base, + X86_MM1, X86_MM2, X86_MM3, + X86_MM4, X86_MM5, X86_MM6, X86_MM7, +}; + +enum { + X86_XMM0 = X86_RegXMM_Base, + X86_XMM1, X86_XMM2, X86_XMM3, + X86_XMM4, X86_XMM5, X86_XMM6, X86_XMM7, + X86_XMM8, X86_XMM9, X86_XMM10, X86_XMM11, + X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15 +}; + +enum { + X86_ST0 = X86_RegFPU_Base, + X86_ST1, X86_ST2, X86_ST3, + X86_ST4, X86_ST5, X86_ST6, X86_ST7 +}; + +/* Register control and access + * + * _r0P(R) Null register? + * _rIP(R) RIP register? + * _rXP(R) Extended register? + * + * _rC(R) Class of register (only valid if X86_FLAT_REGISTERS) + * _rR(R) Full register number + * _rN(R) Short register number for encoding + * + * _r1(R) 8-bit register ID + * _r2(R) 16-bit register ID + * _r4(R) 32-bit register ID + * _r8(R) 64-bit register ID + * _rM(R) MMX register ID + * _rX(R) XMM register ID + * _rF(R) FPU register ID + * _rA(R) Address register ID used for EA calculation + */ + +#define _rST0P(R) ((int)(R) == (int)X86_ST0) +#define _r0P(R) ((int)(R) == (int)X86_NOREG) +#define _rIP(R) (X86_TARGET_64BIT ? ((int)(R) == (int)X86_RIP) : 0) + +#if X86_FLAT_REGISTERS +#define _rC(R) ((R) & 0xf0) +#define _rR(R) ((R) & 0x0f) +#define _rN(R) ((R) & 0x07) +#define _rXP(R) (((R) > 0 && _rR(R) > 7) ? 1 : 0) +#else +#define _rN(R) ((R) & 0x07) +#define _rR(R) (int(R)) +#define _rXP(R) ((_rR(R) > 7 && _rR(R) < 16) ? 1 : 0) +#endif + +#if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS +#define _r1(R) _rN(R) +#define _r2(R) _rN(R) +#define _r4(R) _rN(R) +#define _r8(R) _rN(R) +#define _rA(R) _rN(R) +#define _rM(R) _rN(R) +#define _rX(R) _rN(R) +#define _rF(R) _rN(R) +#else +#define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required")) +#define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required")) +#define _r4(R) ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("32-bit register required")) +#define _r8(R) ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("64-bit register required")) +#define _rA(R) ( X86_TARGET_64BIT ? \ + ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \ + ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) ) +#define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required")) +#define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required")) +#define _rF(R) ( (_rC(R) == X86_RegFPU_Base) ? _rN(R) : x86_emit_failure0("FPU register required")) +#endif + +#define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP) +#define _r1e8lP(R) (int(R) >= X86_SPL && int(R) <= X86_DIL) +#define _rbpP(R) (_rR(R) == _rR(X86_RBP)) +#define _rspP(R) (_rR(R) == _rR(X86_RSP)) +#define _rbp13P(R) (_rN(R) == _rN(X86_RBP)) +#define _rsp12P(R) (_rN(R) == _rN(X86_RSP)) + + +/* ========================================================================= */ +/* --- UTILITY ------------------------------------------------------------- */ +/* ========================================================================= */ + +typedef signed char _sc; +typedef unsigned char _uc; +typedef signed short _ss; +typedef unsigned short _us; +typedef signed int _sl; +typedef unsigned int _ul; + +#define _UC(X) ((_uc )(unsigned long)(X)) +#define _US(X) ((_us )(unsigned long)(X)) +#define _SL(X) ((_sl )(unsigned long)(X)) +#define _UL(X) ((_ul )(unsigned long)(X)) + +#define _PUC(X) ((_uc *)(X)) +#define _PUS(X) ((_us *)(X)) +#define _PSL(X) ((_sl *)(X)) +#define _PUL(X) ((_ul *)(X)) + +#define _B(B) x86_emit_byte((B)) +#define _W(W) x86_emit_word((W)) +#define _L(L) x86_emit_long((L)) +#define _Q(Q) x86_emit_quad((Q)) + +#define _MASK(N) ((unsigned)((1<<(N)))-1) +#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N))) +#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N))) +#define _suiP(N,I) (_siP(N,I) | _uiP(N,I)) + +#ifndef _ASM_SAFETY +#define _ck_s(W,I) (_UL(I) & _MASK(W)) +#define _ck_u(W,I) (_UL(I) & _MASK(W)) +#define _ck_su(W,I) (_UL(I) & _MASK(W)) +#define _ck_d(W,I) (_UL(I) & _MASK(W)) +#else +#define _ck_s(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "signed integer `"#I"' too large for "#W"-bit field")) +#define _ck_u(W,I) (_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field")) +#define _ck_su(W,I) (_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "integer `"#I"' too large for "#W"-bit field")) +#define _ck_d(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "displacement `"#I"' too large for "#W"-bit field")) +#endif + +#define _s0P(I) ((I)==0) +#define _s8P(I) _siP(8,I) +#define _s16P(I) _siP(16,I) +#define _u8P(I) _uiP(8,I) +#define _u16P(I) _uiP(16,I) + +#define _su8(I) _ck_su(8,I) +#define _su16(I) _ck_su(16,I) + +#define _s1(I) _ck_s( 1,I) +#define _s2(I) _ck_s( 2,I) +#define _s3(I) _ck_s( 3,I) +#define _s4(I) _ck_s( 4,I) +#define _s5(I) _ck_s( 5,I) +#define _s6(I) _ck_s( 6,I) +#define _s7(I) _ck_s( 7,I) +#define _s8(I) _ck_s( 8,I) +#define _s9(I) _ck_s( 9,I) +#define _s10(I) _ck_s(10,I) +#define _s11(I) _ck_s(11,I) +#define _s12(I) _ck_s(12,I) +#define _s13(I) _ck_s(13,I) +#define _s14(I) _ck_s(14,I) +#define _s15(I) _ck_s(15,I) +#define _s16(I) _ck_s(16,I) +#define _s17(I) _ck_s(17,I) +#define _s18(I) _ck_s(18,I) +#define _s19(I) _ck_s(19,I) +#define _s20(I) _ck_s(20,I) +#define _s21(I) _ck_s(21,I) +#define _s22(I) _ck_s(22,I) +#define _s23(I) _ck_s(23,I) +#define _s24(I) _ck_s(24,I) +#define _s25(I) _ck_s(25,I) +#define _s26(I) _ck_s(26,I) +#define _s27(I) _ck_s(27,I) +#define _s28(I) _ck_s(28,I) +#define _s29(I) _ck_s(29,I) +#define _s30(I) _ck_s(30,I) +#define _s31(I) _ck_s(31,I) +#define _u1(I) _ck_u( 1,I) +#define _u2(I) _ck_u( 2,I) +#define _u3(I) _ck_u( 3,I) +#define _u4(I) _ck_u( 4,I) +#define _u5(I) _ck_u( 5,I) +#define _u6(I) _ck_u( 6,I) +#define _u7(I) _ck_u( 7,I) +#define _u8(I) _ck_u( 8,I) +#define _u9(I) _ck_u( 9,I) +#define _u10(I) _ck_u(10,I) +#define _u11(I) _ck_u(11,I) +#define _u12(I) _ck_u(12,I) +#define _u13(I) _ck_u(13,I) +#define _u14(I) _ck_u(14,I) +#define _u15(I) _ck_u(15,I) +#define _u16(I) _ck_u(16,I) +#define _u17(I) _ck_u(17,I) +#define _u18(I) _ck_u(18,I) +#define _u19(I) _ck_u(19,I) +#define _u20(I) _ck_u(20,I) +#define _u21(I) _ck_u(21,I) +#define _u22(I) _ck_u(22,I) +#define _u23(I) _ck_u(23,I) +#define _u24(I) _ck_u(24,I) +#define _u25(I) _ck_u(25,I) +#define _u26(I) _ck_u(26,I) +#define _u27(I) _ck_u(27,I) +#define _u28(I) _ck_u(28,I) +#define _u29(I) _ck_u(29,I) +#define _u30(I) _ck_u(30,I) +#define _u31(I) _ck_u(31,I) + +/* ========================================================================= */ +/* --- ASSEMBLER ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define _b00 0 +#define _b01 1 +#define _b10 2 +#define _b11 3 + +#define _b000 0 +#define _b001 1 +#define _b010 2 +#define _b011 3 +#define _b100 4 +#define _b101 5 +#define _b110 6 +#define _b111 7 + +#define _OFF4(D) (_UL(D) - _UL(x86_get_target())) +#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) ) + +#define _D8(D) (_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D))) +#define _D32(D) (_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D))) + +#ifndef _ASM_SAFETY +# define _M(M) (M) +# define _r(R) (R) +# define _m(M) (M) +# define _s(S) (S) +# define _i(I) (I) +# define _b(B) (B) +#else +# define _M(M) (((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M)) +# define _r(R) (((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R)) +# define _m(M) (((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M)) +# define _s(S) (((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S)) +# define _i(I) (((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I)) +# define _b(B) (((B)>7) ? x86_emit_failure0("internal error: memory base = " #B) : (B)) +#endif + +#define _Mrm(Md,R,M) _B((_M(Md)<<6)|(_r(R)<<3)|_m(M)) +#define _SIB(Sc,I, B) _B((_s(Sc)<<6)|(_i(I)<<3)|_b(B)) + +#define _SCL(S) ((((S)==1) ? _b00 : \ + (((S)==2) ? _b01 : \ + (((S)==4) ? _b10 : \ + (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S)))))) + + +/* --- Memory subformats - urgh! ------------------------------------------- */ + +/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */ +#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((_sl)(D))) +#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((_sl)(D))) +#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) ) +#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) ) +#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((_sc)(D))) +#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((_sc)(D))) +#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((_sl)(D))) +#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((_sl)(D))) +#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((_sl)(D))) + +#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) +#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) + +/* Use RIP-addressing in 64-bit mode, if possible */ +#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \ + ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff)) + +#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \ + (_x86_RIP_addressing_possible(D, O) ? \ + _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \ + _r_DSIB(R,D))) : \ + (_rIP(B) ? _r_D (R,D ) : \ + (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1) : \ + _r_DB (R,D, B )))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \ + x86_emit_failure("illegal index register: %esp")))) + + +/* --- Instruction formats ------------------------------------------------- */ + +#define _m32only(X) (! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode")) +#define _m64only(X) ( X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode")) +#define _m64(X) ( X86_TARGET_64BIT ? X : ((void)0) ) + +/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */ + +#define _d16() ( _B(0x66 ) ) +#define _O( OP ) ( _B( OP ) ) +#define _Or( OP,R ) ( _B( (OP)|_r(R)) ) +#define _OO( OP ) ( _B((OP)>>8), _B(( (OP) )&0xff) ) +#define _OOr( OP,R ) ( _B((OP)>>8), _B(( (OP)|_r(R))&0xff) ) +#define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) ) +#define _sW( W ) ( _s8P(W) ? _B(W):_W(W) ) +#define _sL( L ) ( _s8P(L) ? _B(L):_L(L) ) +#define _sWO( W ) ( _s8P(W) ? 1 : 2 ) +#define _sLO( L ) ( _s8P(L) ? 1 : 4 ) +#define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) ) +#define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) ) +#define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) ) +#define _OO_L( OP ,L ) ( _OO ( OP ) ,_L(L) ) +#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) ) +#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) ) +#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) ) +#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) ) +#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) ) +#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_W(W),_B(B)) +#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_B(B) ) +#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_W(W) ) +#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_L(L) ) +#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_Q(Q) ) +#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) ) +#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) ) +#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_B(B) ) +#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_W(W) ) +#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_L(L) ) +#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_B(B) ) +#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) ) +#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) ) +#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) ) +#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_W(W) ) +#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_L(L) ) +#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) ) +#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W)) +#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L)) +#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) ) +#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) ) +#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) ) + + +/* --- REX prefixes -------------------------------------------------------- */ + +#define _VOID() ((void)0) +#define _BIT(X) ((X) ? 1 : 0) +#define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B))) + +#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID()) +#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR)))) +#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR)) +#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR)))) +#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB)))) + +// FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH) +#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR)) +#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB)) +#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS) + +#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR)) +#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR)) +#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB)) +#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS) +#define _REXLr(RR) _m64(__REX_reg(RR)) +#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI)) + +#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR)) +#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB)) +#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS) +#define _REXQr(RR) _m64only(__REX_reg(RR)) +#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI)) + + +/* ========================================================================= */ +/* --- Fully-qualified intrinsic instructions ------------------------------ */ +/* ========================================================================= */ + +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + * + 0 = top of stack register (for FPU instructions) + * + * NOTE in x86-64 mode: a memory operand with only a valid + * displacement value will lead to the expect absolute mode. If + * RIP addressing is necessary, X86_RIP shall be used as the base + * register argument. + */ + +/* --- ALU instructions ---------------------------------------------------- */ + +enum { + X86_ADD = 0, + X86_OR = 1, + X86_ADC = 2, + X86_SBB = 3, + X86_AND = 4, + X86_SUB = 5, + X86_XOR = 6, + X86_CMP = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) )) +#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2 ,_r1(RD) ,MD,MB,MI,MS )) +#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) ,_r1(RS) ,MD,MB,MI,MS )) +#define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ + (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) ) +#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM))) + +#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) )) +#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS )) +#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS )) +#define _ALUWir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \ + (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \ + (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) ) +#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM))) + +#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) )) +#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS )) +#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS )) +#define _ALULir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \ + (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) ) +#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) )) +#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS )) +#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS )) +#define _ALUQir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \ + (_REXQrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXQrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r8(RD) ,IM )) ) +#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD) +#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD) +#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD) +#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD) +#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD) +#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD) +#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD) +#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD) +#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD) +#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD) +#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD) +#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD) +#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD) +#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD) +#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD) +#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD) +#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS) + +#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD) +#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD) +#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD) +#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD) +#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD) +#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD) +#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD) +#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD) +#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS) + +#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD) +#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD) +#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD) +#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD) +#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD) +#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD) +#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD) +#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD) +#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS) + +#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD) +#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD) +#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS) +#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD) +#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS) + +#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD) +#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD) +#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS) +#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD) +#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS) + +#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD) +#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD) +#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS) +#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD) +#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS) + +#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD) +#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD) +#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS) +#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD) +#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS) + +#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD) +#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD) +#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD) +#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD) +#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD) +#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD) +#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD) +#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD) +#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS) + +#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD) +#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD) +#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD) +#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD) +#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD) +#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD) +#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD) +#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD) +#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS) + +#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD) +#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD) +#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD) +#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD) +#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD) +#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD) +#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD) +#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD) +#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS) + + +/* --- Shift/Rotate instructions ------------------------------------------- */ + +enum { + X86_ROL = 0, + X86_ROR = 1, + X86_RCL = 2, + X86_RCR = 3, + X86_SHL = 4, + X86_SHR = 5, + X86_SAR = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ROTSHIBir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) ) +#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \ + (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIBrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHIWir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) ) +#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIWrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHILir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \ + (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) ) +#define _ROTSHILim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHILrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHIQir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \ + (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) ) +#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIQrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD) +#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD) +#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD) +#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD) +#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD) +#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD) +#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD) +#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD) +#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS) + +#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD) +#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS) +#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD) +#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD) +#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS) +#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD) +#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD) +#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS) +#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD) +#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD) +#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS) +#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD) +#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD) +#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD) +#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD) +#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD) +#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD) +#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD) +#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD) +#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD) +#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD) +#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD) +#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD) +#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD) +#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD) +#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD) +#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD) +#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD) +#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS) + +#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD) +#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD) +#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD) +#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD) +#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD) +#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD) +#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD) +#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD) +#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD) +#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD) +#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD) +#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD) +#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD) +#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD) +#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD) +#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD) +#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SALBir SHLBir +#define SALBim SHLBim +#define SALBrr SHLBrr +#define SALBrm SHLBrm + +#define SALWir SHLWir +#define SALWim SHLWim +#define SALWrr SHLWrr +#define SALWrm SHLWrm + +#define SALLir SHLLir +#define SALLim SHLLim +#define SALLrr SHLLrr +#define SALLrm SHLLrm + +#define SALQir SHLQir +#define SALQim SHLQim +#define SALQrr SHLQrr +#define SALQrm SHLQrm + +#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD) +#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS) +#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD) +#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD) +#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS) +#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD) +#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD) +#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS) +#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD) +#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD) +#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS) +#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD) +#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS) + + +/* --- Bit test instructions ----------------------------------------------- */ + +enum { + X86_BT = 4, + X86_BTS = 5, + X86_BTR = 6, + X86_BTC = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM))) +#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) )) +#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS )) + +#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM))) +#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) )) +#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS )) + +#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM))) +#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) )) +#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS )) + +#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD) +#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MB, MI, MS) +#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD) +#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD) +#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS) +#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD) +#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD) +#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS) +#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD) +#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD) +#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD) +#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD) +#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD) +#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD) +#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD) +#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD) +#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD) +#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD) +#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD) +#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD) +#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD) +#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD) +#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD) +#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD) +#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD) +#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD) +#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD) +#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS) + + +/* --- Move instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) )) +#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS )) +#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS )) +#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM))) +#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM))) + +#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) )) +#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS )) +#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM))) +#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM))) + +#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) )) +#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS )) +#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM )) +#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + +#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) )) +#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS )) +#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM )) +#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + + +/* --- Unary and Multiply/Divide instructions ------------------------------ */ + +enum { + X86_NOT = 2, + X86_NEG = 3, + X86_MUL = 4, + X86_IMUL = 5, + X86_DIV = 6, + X86_IDIV = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) )) +#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS )) +#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) )) +#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) )) +#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) )) +#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) + +#define NOTBr(RS) _UNARYBr(X86_NOT, RS) +#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS) +#define NOTWr(RS) _UNARYWr(X86_NOT, RS) +#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS) +#define NOTLr(RS) _UNARYLr(X86_NOT, RS) +#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS) +#define NOTQr(RS) _UNARYQr(X86_NOT, RS) +#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS) + +#define NEGBr(RS) _UNARYBr(X86_NEG, RS) +#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS) +#define NEGWr(RS) _UNARYWr(X86_NEG, RS) +#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS) +#define NEGLr(RS) _UNARYLr(X86_NEG, RS) +#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS) +#define NEGQr(RS) _UNARYQr(X86_NEG, RS) +#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS) + +#define MULBr(RS) _UNARYBr(X86_MUL, RS) +#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS) +#define MULWr(RS) _UNARYWr(X86_MUL, RS) +#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS) +#define MULLr(RS) _UNARYLr(X86_MUL, RS) +#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS) +#define MULQr(RS) _UNARYQr(X86_MUL, RS) +#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS) + +#define IMULBr(RS) _UNARYBr(X86_IMUL, RS) +#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS) +#define IMULWr(RS) _UNARYWr(X86_IMUL, RS) +#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS) +#define IMULLr(RS) _UNARYLr(X86_IMUL, RS) +#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS) +#define IMULQr(RS) _UNARYQr(X86_IMUL, RS) +#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS) + +#define DIVBr(RS) _UNARYBr(X86_DIV, RS) +#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS) +#define DIVWr(RS) _UNARYWr(X86_DIV, RS) +#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS) +#define DIVLr(RS) _UNARYLr(X86_DIV, RS) +#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS) +#define DIVQr(RS) _UNARYQr(X86_DIV, RS) +#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS) + +#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS) +#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS) +#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS) +#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS) +#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS) +#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS) +#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS) +#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) )) +#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS )) + +#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) )) +#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) )) + +#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM )) +#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) )) +#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS )) + +#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM )) +#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) )) +#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS )) + +#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM )) +#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM )) + +#define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM )) +#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM )) + + +/* --- Control Flow related instructions ----------------------------------- */ + +enum { + X86_CC_O = 0x0, + X86_CC_NO = 0x1, + X86_CC_NAE = 0x2, + X86_CC_B = 0x2, + X86_CC_C = 0x2, + X86_CC_AE = 0x3, + X86_CC_NB = 0x3, + X86_CC_NC = 0x3, + X86_CC_E = 0x4, + X86_CC_Z = 0x4, + X86_CC_NE = 0x5, + X86_CC_NZ = 0x5, + X86_CC_BE = 0x6, + X86_CC_NA = 0x6, + X86_CC_A = 0x7, + X86_CC_NBE = 0x7, + X86_CC_S = 0x8, + X86_CC_NS = 0x9, + X86_CC_P = 0xa, + X86_CC_PE = 0xa, + X86_CC_NP = 0xb, + X86_CC_PO = 0xb, + X86_CC_L = 0xc, + X86_CC_NGE = 0xc, + X86_CC_GE = 0xd, + X86_CC_NL = 0xd, + X86_CC_LE = 0xe, + X86_CC_NG = 0xe, + X86_CC_G = 0xf, + X86_CC_NLE = 0xf, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode +#define CALLm(M) _O_D32 (0xe8 ,(int)(M) ) +#define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) )) +#define _CALLQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) )) +#define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R)) +#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S )) + +// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode +#define JMPSm(M) _O_D8 (0xeb ,(int)(M) ) +#define JMPm(M) _O_D32 (0xe9 ,(int)(M) ) +#define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) )) +#define _JMPQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) )) +#define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R)) +#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) ) +#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) ) +#define JOSm(D) JCCSim(X86_CC_O, D) +#define JNOSm(D) JCCSim(X86_CC_NO, D) +#define JBSm(D) JCCSim(X86_CC_B, D) +#define JNAESm(D) JCCSim(X86_CC_NAE, D) +#define JNBSm(D) JCCSim(X86_CC_NB, D) +#define JAESm(D) JCCSim(X86_CC_AE, D) +#define JESm(D) JCCSim(X86_CC_E, D) +#define JZSm(D) JCCSim(X86_CC_Z, D) +#define JNESm(D) JCCSim(X86_CC_NE, D) +#define JNZSm(D) JCCSim(X86_CC_NZ, D) +#define JBESm(D) JCCSim(X86_CC_BE, D) +#define JNASm(D) JCCSim(X86_CC_NA, D) +#define JNBESm(D) JCCSim(X86_CC_NBE, D) +#define JASm(D) JCCSim(X86_CC_A, D) +#define JSSm(D) JCCSim(X86_CC_S, D) +#define JNSSm(D) JCCSim(X86_CC_NS, D) +#define JPSm(D) JCCSim(X86_CC_P, D) +#define JPESm(D) JCCSim(X86_CC_PE, D) +#define JNPSm(D) JCCSim(X86_CC_NP, D) +#define JPOSm(D) JCCSim(X86_CC_PO, D) +#define JLSm(D) JCCSim(X86_CC_L, D) +#define JNGESm(D) JCCSim(X86_CC_NGE, D) +#define JNLSm(D) JCCSim(X86_CC_NL, D) +#define JGESm(D) JCCSim(X86_CC_GE, D) +#define JLESm(D) JCCSim(X86_CC_LE, D) +#define JNGSm(D) JCCSim(X86_CC_NG, D) +#define JNLESm(D) JCCSim(X86_CC_NLE, D) +#define JGSm(D) JCCSim(X86_CC_G, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) ) +#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) ) +#define JOm(D) JCCim(X86_CC_O, D) +#define JNOm(D) JCCim(X86_CC_NO, D) +#define JBm(D) JCCim(X86_CC_B, D) +#define JNAEm(D) JCCim(X86_CC_NAE, D) +#define JNBm(D) JCCim(X86_CC_NB, D) +#define JAEm(D) JCCim(X86_CC_AE, D) +#define JEm(D) JCCim(X86_CC_E, D) +#define JZm(D) JCCim(X86_CC_Z, D) +#define JNEm(D) JCCim(X86_CC_NE, D) +#define JNZm(D) JCCim(X86_CC_NZ, D) +#define JBEm(D) JCCim(X86_CC_BE, D) +#define JNAm(D) JCCim(X86_CC_NA, D) +#define JNBEm(D) JCCim(X86_CC_NBE, D) +#define JAm(D) JCCim(X86_CC_A, D) +#define JSm(D) JCCim(X86_CC_S, D) +#define JNSm(D) JCCim(X86_CC_NS, D) +#define JPm(D) JCCim(X86_CC_P, D) +#define JPEm(D) JCCim(X86_CC_PE, D) +#define JNPm(D) JCCim(X86_CC_NP, D) +#define JPOm(D) JCCim(X86_CC_PO, D) +#define JLm(D) JCCim(X86_CC_L, D) +#define JNGEm(D) JCCim(X86_CC_NGE, D) +#define JNLm(D) JCCim(X86_CC_NL, D) +#define JGEm(D) JCCim(X86_CC_GE, D) +#define JLEm(D) JCCim(X86_CC_LE, D) +#define JNGm(D) JCCim(X86_CC_NG, D) +#define JNLEm(D) JCCim(X86_CC_NLE, D) +#define JGm(D) JCCim(X86_CC_G, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) )) +#define SETOr(RD) SETCCir(X86_CC_O, RD) +#define SETNOr(RD) SETCCir(X86_CC_NO, RD) +#define SETBr(RD) SETCCir(X86_CC_B, RD) +#define SETNAEr(RD) SETCCir(X86_CC_NAE, RD) +#define SETNBr(RD) SETCCir(X86_CC_NB, RD) +#define SETAEr(RD) SETCCir(X86_CC_AE, RD) +#define SETEr(RD) SETCCir(X86_CC_E, RD) +#define SETZr(RD) SETCCir(X86_CC_Z, RD) +#define SETNEr(RD) SETCCir(X86_CC_NE, RD) +#define SETNZr(RD) SETCCir(X86_CC_NZ, RD) +#define SETBEr(RD) SETCCir(X86_CC_BE, RD) +#define SETNAr(RD) SETCCir(X86_CC_NA, RD) +#define SETNBEr(RD) SETCCir(X86_CC_NBE, RD) +#define SETAr(RD) SETCCir(X86_CC_A, RD) +#define SETSr(RD) SETCCir(X86_CC_S, RD) +#define SETNSr(RD) SETCCir(X86_CC_NS, RD) +#define SETPr(RD) SETCCir(X86_CC_P, RD) +#define SETPEr(RD) SETCCir(X86_CC_PE, RD) +#define SETNPr(RD) SETCCir(X86_CC_NP, RD) +#define SETPOr(RD) SETCCir(X86_CC_PO, RD) +#define SETLr(RD) SETCCir(X86_CC_L, RD) +#define SETNGEr(RD) SETCCir(X86_CC_NGE, RD) +#define SETNLr(RD) SETCCir(X86_CC_NL, RD) +#define SETGEr(RD) SETCCir(X86_CC_GE, RD) +#define SETLEr(RD) SETCCir(X86_CC_LE, RD) +#define SETNGr(RD) SETCCir(X86_CC_NG, RD) +#define SETNLEr(RD) SETCCir(X86_CC_NLE, RD) +#define SETGr(RD) SETCCir(X86_CC_G, RD) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS )) +#define SETOm(D, B, I, S) SETCCim(X86_CC_O, D, B, I, S) +#define SETNOm(D, B, I, S) SETCCim(X86_CC_NO, D, B, I, S) +#define SETBm(D, B, I, S) SETCCim(X86_CC_B, D, B, I, S) +#define SETNAEm(D, B, I, S) SETCCim(X86_CC_NAE, D, B, I, S) +#define SETNBm(D, B, I, S) SETCCim(X86_CC_NB, D, B, I, S) +#define SETAEm(D, B, I, S) SETCCim(X86_CC_AE, D, B, I, S) +#define SETEm(D, B, I, S) SETCCim(X86_CC_E, D, B, I, S) +#define SETZm(D, B, I, S) SETCCim(X86_CC_Z, D, B, I, S) +#define SETNEm(D, B, I, S) SETCCim(X86_CC_NE, D, B, I, S) +#define SETNZm(D, B, I, S) SETCCim(X86_CC_NZ, D, B, I, S) +#define SETBEm(D, B, I, S) SETCCim(X86_CC_BE, D, B, I, S) +#define SETNAm(D, B, I, S) SETCCim(X86_CC_NA, D, B, I, S) +#define SETNBEm(D, B, I, S) SETCCim(X86_CC_NBE, D, B, I, S) +#define SETAm(D, B, I, S) SETCCim(X86_CC_A, D, B, I, S) +#define SETSm(D, B, I, S) SETCCim(X86_CC_S, D, B, I, S) +#define SETNSm(D, B, I, S) SETCCim(X86_CC_NS, D, B, I, S) +#define SETPm(D, B, I, S) SETCCim(X86_CC_P, D, B, I, S) +#define SETPEm(D, B, I, S) SETCCim(X86_CC_PE, D, B, I, S) +#define SETNPm(D, B, I, S) SETCCim(X86_CC_NP, D, B, I, S) +#define SETPOm(D, B, I, S) SETCCim(X86_CC_PO, D, B, I, S) +#define SETLm(D, B, I, S) SETCCim(X86_CC_L, D, B, I, S) +#define SETNGEm(D, B, I, S) SETCCim(X86_CC_NGE, D, B, I, S) +#define SETNLm(D, B, I, S) SETCCim(X86_CC_NL, D, B, I, S) +#define SETGEm(D, B, I, S) SETCCim(X86_CC_GE, D, B, I, S) +#define SETLEm(D, B, I, S) SETCCim(X86_CC_LE, D, B, I, S) +#define SETNGm(D, B, I, S) SETCCim(X86_CC_NG, D, B, I, S) +#define SETNLEm(D, B, I, S) SETCCim(X86_CC_NLE, D, B, I, S) +#define SETGm(D, B, I, S) SETCCim(X86_CC_G, D, B, I, S) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) )) +#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS )) +#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) )) +#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS )) +#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) )) +#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS )) + + +/* --- Push/Pop instructions ----------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) ))) +#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) )) +#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )) + +#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) ))) +#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) ))) +#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS ))) +#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM ))) + +#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) )) +#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )) +#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM )) + +#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) ))) +#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))) +#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM )) + +#define POPA() (_d16(), _O (0x61 )) +#define POPAD() _O (0x61 ) + +#define PUSHA() (_d16(), _O (0x60 )) +#define PUSHAD() _O (0x60 ) + +#define POPF() _O (0x9d ) +#define PUSHF() _O (0x9c ) + + +/* --- Test instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) )) +#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS )) +#define TESTBir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ + (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) ) +#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM))) + +#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) )) +#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS )) +#define TESTWir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \ + (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) ) +#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM))) + +#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) )) +#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS )) +#define TESTLir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \ + (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) ) +#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + +#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) )) +#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS )) +#define TESTQir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \ + (_REXQrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXQrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r8(RD) ,IM )) ) +#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + + +/* --- Exchange instructions ----------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) )) +#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) )) +#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) )) +#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) )) +#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) )) +#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS )) + +#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) )) +#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS )) + +#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) )) +#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS )) + +#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) )) +#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) )) +#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS )) + +#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) )) +#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS )) + +#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) )) +#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS )) + +#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) )) +#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS )) + + +/* --- Increment/Decrement instructions ------------------------------------ */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS )) +#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) )) + +#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x48,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) ))) + +#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECLr(RD) (! X86_TARGET_64BIT ? _Or (0x48,_r4(RD) ) : \ + (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) ))) + +#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) )) + +#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS )) +#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) )) + +#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x40,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) ) + +#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCLr(RD) (! X86_TARGET_64BIT ? _Or (0x40,_r4(RD) ) : \ + (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) ))) + +#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) )) + + +/* --- Misc instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) )) +#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS )) +#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) )) +#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS )) + +#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) )) +#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS )) +#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) )) +#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS )) + +#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) )) +#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS )) +#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) )) +#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) )) +#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) )) +#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS )) + +#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) )) +#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) )) +#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS )) + +#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) )) +#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) )) +#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) )) +#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) )) +#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS )) + +#define MOVSWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) ))) +#define MOVSWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS ))) +#define MOVZWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) ))) +#define MOVZWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS ))) + +#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) ))) +#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS ))) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) +#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) + +#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) )) +#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) )) + +#define CLC() _O (0xf8 ) +#define STC() _O (0xf9 ) +#define CMC() _O (0xf5 ) + +#define CLD() _O (0xfc ) +#define STD() _O (0xfd ) + +#define CBTW() (_d16(), _O (0x98 )) +#define CWTL() _O (0x98 ) +#define CLTQ() _m64only(_REXQrr(0, 0), _O (0x98 )) + +#define CBW CBTW +#define CWDE CWTL +#define CDQE CLTQ + +#define CWTD() (_d16(), _O (0x99 )) +#define CLTD() _O (0x99 ) +#define CQTO() _m64only(_REXQrr(0, 0), _O (0x99 )) + +#define CWD CWTD +#define CDQ CLTD +#define CQO CQTO + +#define LAHF() _O (0x9f ) +#define SAHF() _O (0x9e ) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define CPUID() _OO (0x0fa2 ) +#define RDTSC() _OO (0xff31 ) + +#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) + +#define LEAVE() _O (0xc9 ) +#define RET() _O (0xc3 ) +#define RETi(IM) _O_W (0xc2 ,_su16(IM)) + +#define NOP() _O (0x90 ) + + +/* --- Media 64-bit instructions ------------------------------------------- */ + +enum { + X86_MMX_PABSB = 0x1c, // 2P + X86_MMX_PABSW = 0x1d, // 2P + X86_MMX_PABSD = 0x1e, // 2P + X86_MMX_PACKSSWB = 0x63, + X86_MMX_PACKSSDW = 0x6b, + X86_MMX_PACKUSWB = 0x67, + X86_MMX_PADDB = 0xfc, + X86_MMX_PADDW = 0xfd, + X86_MMX_PADDD = 0xfe, + X86_MMX_PADDQ = 0xd4, + X86_MMX_PADDSB = 0xec, + X86_MMX_PADDSW = 0xed, + X86_MMX_PADDUSB = 0xdc, + X86_MMX_PADDUSW = 0xdd, + X86_MMX_PAND = 0xdb, + X86_MMX_PANDN = 0xdf, + X86_MMX_PAVGB = 0xe0, + X86_MMX_PAVGW = 0xe3, + X86_MMX_PCMPEQB = 0x74, + X86_MMX_PCMPEQW = 0x75, + X86_MMX_PCMPEQD = 0x76, + X86_MMX_PCMPGTB = 0x64, + X86_MMX_PCMPGTW = 0x65, + X86_MMX_PCMPGTD = 0x66, + X86_MMX_PEXTRW = 0xc5, // 64, /r ib + X86_MMX_PHADDW = 0x01, // 2P + X86_MMX_PHADDD = 0x02, // 2P + X86_MMX_PHADDSW = 0x03, // 2P + X86_MMX_PHSUBW = 0x05, // 2P + X86_MMX_PHSUBD = 0x06, // 2P + X86_MMX_PHSUBSW = 0x07, // 2P + X86_MMX_PINSRW = 0xc4, // 64, /r ib + X86_MMX_PMADDUBSW = 0x04, // 2P + X86_MMX_PMADDWD = 0xf5, + X86_MMX_PMAXSW = 0xee, + X86_MMX_PMAXUB = 0xde, + X86_MMX_PMINSW = 0xea, + X86_MMX_PMINUB = 0xda, + X86_MMX_PMOVMSKB = 0xd7, // 64 + X86_MMX_PMULHRSW = 0x0b, // 2P + X86_MMX_PMULHUW = 0xe4, + X86_MMX_PMULHW = 0xe5, + X86_MMX_PMULLW = 0xd5, + X86_MMX_PMULUDQ = 0xf4, + X86_MMX_POR = 0xeb, + X86_MMX_PSADBW = 0xf6, + X86_MMX_PSHUFB = 0x00, // 2P + X86_MMX_PSHUFW = 0x70, // /r ib + X86_MMX_PSIGNB = 0x08, // 2P + X86_MMX_PSIGNW = 0x09, // 2P + X86_MMX_PSIGND = 0x0a, // 2P + X86_MMX_PSLLW = 0xf1, + X86_MMX_PSLLWi = 0x71, // /6 ib + X86_MMX_PSLLD = 0xf2, + X86_MMX_PSLLDi = 0x72, // /6 ib + X86_MMX_PSLLQ = 0xf3, + X86_MMX_PSLLQi = 0x73, // /6 ib + X86_MMX_PSRAW = 0xe1, + X86_MMX_PSRAWi = 0x71, // /4 ib + X86_MMX_PSRAD = 0xe2, + X86_MMX_PSRADi = 0x72, // /4 ib + X86_MMX_PSRLW = 0xd1, + X86_MMX_PSRLWi = 0x71, // /2 ib + X86_MMX_PSRLD = 0xd2, + X86_MMX_PSRLDi = 0x72, // /2 ib + X86_MMX_PSRLQ = 0xd3, + X86_MMX_PSRLQi = 0x73, // /2 ib + X86_MMX_PSUBB = 0xf8, + X86_MMX_PSUBW = 0xf9, + X86_MMX_PSUBD = 0xfa, + X86_MMX_PSUBQ = 0xfb, + X86_MMX_PSUBSB = 0xe8, + X86_MMX_PSUBSW = 0xe9, + X86_MMX_PSUBUSB = 0xd8, + X86_MMX_PSUBUSW = 0xd9, + X86_MMX_PUNPCKHBW = 0x68, + X86_MMX_PUNPCKHWD = 0x69, + X86_MMX_PUNPCKHDQ = 0x6a, + X86_MMX_PUNPCKLBW = 0x60, + X86_MMX_PUNPCKLWD = 0x61, + X86_MMX_PUNPCKLDQ = 0x62, + X86_MMX_PXOR = 0xef, +}; + +#define __MMXLrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __MMXLmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __MMXLrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __MMXLirr(OP,IM,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM))) +#define __MMXLimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM))) +#define __MMXQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __MMXQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __MMXQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __MMXQirr(OP,IM,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM))) +#define __MMXQimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM))) +#define __MMX1Lrr(PX,OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _B(0x0f),_OO_Mrm(((PX)<<8)|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __MMX1Lmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __MMX1Lrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _MMXLrr(OP,RS,RD) __MMXLrr(OP,RS,_rM,RD,_rM) +#define _MMXLmr(OP,MD,MB,MI,MS,RD) __MMXLmr(OP,MD,MB,MI,MS,RD,_rM) +#define _MMXLrm(OP,RS,MD,MB,MI,MS) __MMXLrm(OP,RS,_rM,MD,MB,MI,MS) +#define _MMXQrr(OP,RS,RD) __MMXQrr(OP,RS,_rM,RD,_rM) +#define _MMXQmr(OP,MD,MB,MI,MS,RD) __MMXQmr(OP,MD,MB,MI,MS,RD,_rM) +#define _MMXQrm(OP,RS,MD,MB,MI,MS) __MMXQrm(OP,RS,_rM,MD,MB,MI,MS) +#define _2P_MMXLrr(OP,RS,RD) __MMX1Lrr(0x38, OP,RS,_rM,RD,_rM) +#define _2P_MMXLmr(OP,MD,MB,MI,MS,RD) __MMX1Lmr(0x38, OP,MD,MB,MI,MS,RD,_rM) +#define _2P_MMXLrm(OP,RS,MD,MB,MI,MS) __MMX1Lrm(0x38, OP,RS,_rM,MD,MB,MI,MS) + +#define MMX_MOVDMDrr(RS, RD) __MMXLrr(0x6e, RS,_r4, RD,_rM) +#define MMX_MOVQMDrr(RS, RD) __MMXQrr(0x6e, RS,_r8, RD,_rM) +#define MMX_MOVDMSrr(RS, RD) __MMXLrr(0x7e, RD,_r4, RS,_rM) +#define MMX_MOVQMSrr(RS, RD) __MMXQrr(0x7e, RD,_r8, RS,_rM) + +#define MMX_MOVDmr(MD, MB, MI, MS, RD) _MMXLmr(0x6e, MD, MB, MI, MS, RD) +#define MMX_MOVDrm(RS, MD, MB, MI, MS) _MMXLrm(0x7e, RS, MD, MB, MI, MS) +#define MMX_MOVQrr(RS, RD) _MMXLrr(0x6f, RS, RD) +#define MMX_MOVQmr(MD, MB, MI, MS, RD) _MMXLmr(0x6f, MD, MB, MI, MS, RD) +#define MMX_MOVQrm(RS, MD, MB, MI, MS) _MMXLrm(0x7f, RS, MD, MB, MI, MS) + +// Original MMX instructions +#define MMX_PACKSSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKSSWB,RS,RD) +#define MMX_PACKSSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSWB, MD, MB, MI, MS, RD) +#define MMX_PACKSSDWrr(RS, RD) _MMXLrr(X86_MMX_PACKSSDW,RS,RD) +#define MMX_PACKSSDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSDW, MD, MB, MI, MS, RD) +#define MMX_PACKUSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKUSWB,RS,RD) +#define MMX_PACKUSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKUSWB, MD, MB, MI, MS, RD) +#define MMX_PADDBrr(RS, RD) _MMXLrr(X86_MMX_PADDB,RS,RD) +#define MMX_PADDBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDB, MD, MB, MI, MS, RD) +#define MMX_PADDWrr(RS, RD) _MMXLrr(X86_MMX_PADDW,RS,RD) +#define MMX_PADDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDW, MD, MB, MI, MS, RD) +#define MMX_PADDDrr(RS, RD) _MMXLrr(X86_MMX_PADDD,RS,RD) +#define MMX_PADDDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDD, MD, MB, MI, MS, RD) +#define MMX_PADDQrr(RS, RD) _MMXLrr(X86_MMX_PADDQ,RS,RD) +#define MMX_PADDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDQ, MD, MB, MI, MS, RD) +#define MMX_PADDSBrr(RS, RD) _MMXLrr(X86_MMX_PADDSB,RS,RD) +#define MMX_PADDSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSB, MD, MB, MI, MS, RD) +#define MMX_PADDSWrr(RS, RD) _MMXLrr(X86_MMX_PADDSW,RS,RD) +#define MMX_PADDSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSW, MD, MB, MI, MS, RD) +#define MMX_PADDUSBrr(RS, RD) _MMXLrr(X86_MMX_PADDUSB,RS,RD) +#define MMX_PADDUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSB, MD, MB, MI, MS, RD) +#define MMX_PADDUSWrr(RS, RD) _MMXLrr(X86_MMX_PADDUSW,RS,RD) +#define MMX_PADDUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSW, MD, MB, MI, MS, RD) +#define MMX_PANDrr(RS, RD) _MMXLrr(X86_MMX_PAND,RS,RD) +#define MMX_PANDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAND, MD, MB, MI, MS, RD) +#define MMX_PANDNrr(RS, RD) _MMXLrr(X86_MMX_PANDN,RS,RD) +#define MMX_PANDNmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PANDN, MD, MB, MI, MS, RD) +#define MMX_PAVGBrr(RS, RD) _MMXLrr(X86_MMX_PAVGB,RS,RD) +#define MMX_PAVGBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGB, MD, MB, MI, MS, RD) +#define MMX_PAVGWrr(RS, RD) _MMXLrr(X86_MMX_PAVGW,RS,RD) +#define MMX_PAVGWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGW, MD, MB, MI, MS, RD) +#define MMX_PCMPEQBrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQB,RS,RD) +#define MMX_PCMPEQBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQB, MD, MB, MI, MS, RD) +#define MMX_PCMPEQWrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQW,RS,RD) +#define MMX_PCMPEQWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQW, MD, MB, MI, MS, RD) +#define MMX_PCMPEQDrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQD,RS,RD) +#define MMX_PCMPEQDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQD, MD, MB, MI, MS, RD) +#define MMX_PCMPGTBrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTB,RS,RD) +#define MMX_PCMPGTBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTB, MD, MB, MI, MS, RD) +#define MMX_PCMPGTWrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTW,RS,RD) +#define MMX_PCMPGTWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTW, MD, MB, MI, MS, RD) +#define MMX_PCMPGTDrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTD,RS,RD) +#define MMX_PCMPGTDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTD, MD, MB, MI, MS, RD) +#define MMX_PMADDWDrr(RS, RD) _MMXLrr(X86_MMX_PMADDWD,RS,RD) +#define MMX_PMADDWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMADDWD, MD, MB, MI, MS, RD) +#define MMX_PMAXSWrr(RS, RD) _MMXLrr(X86_MMX_PMAXSW,RS,RD) +#define MMX_PMAXSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXSW, MD, MB, MI, MS, RD) +#define MMX_PMAXUBrr(RS, RD) _MMXLrr(X86_MMX_PMAXUB,RS,RD) +#define MMX_PMAXUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXUB, MD, MB, MI, MS, RD) +#define MMX_PMINSWrr(RS, RD) _MMXLrr(X86_MMX_PMINSW,RS,RD) +#define MMX_PMINSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINSW, MD, MB, MI, MS, RD) +#define MMX_PMINUBrr(RS, RD) _MMXLrr(X86_MMX_PMINUB,RS,RD) +#define MMX_PMINUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINUB, MD, MB, MI, MS, RD) +#define MMX_PMULHUWrr(RS, RD) _MMXLrr(X86_MMX_PMULHUW,RS,RD) +#define MMX_PMULHUWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHUW, MD, MB, MI, MS, RD) +#define MMX_PMULHWrr(RS, RD) _MMXLrr(X86_MMX_PMULHW,RS,RD) +#define MMX_PMULHWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHW, MD, MB, MI, MS, RD) +#define MMX_PMULLWrr(RS, RD) _MMXLrr(X86_MMX_PMULLW,RS,RD) +#define MMX_PMULLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULLW, MD, MB, MI, MS, RD) +#define MMX_PMULUDQrr(RS, RD) _MMXLrr(X86_MMX_PMULUDQ,RS,RD) +#define MMX_PMULUDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULUDQ, MD, MB, MI, MS, RD) +#define MMX_PORrr(RS, RD) _MMXLrr(X86_MMX_POR,RS,RD) +#define MMX_PORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_POR, MD, MB, MI, MS, RD) +#define MMX_PSADBWrr(RS, RD) _MMXLrr(X86_MMX_PSADBW,RS,RD) +#define MMX_PSADBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSADBW, MD, MB, MI, MS, RD) +#define MMX_PSLLWir(IM, RD) __MMXLirr(X86_MMX_PSLLWi, IM, RD,_rM, _b110,_rN) +#define MMX_PSLLWrr(RS, RD) _MMXLrr(X86_MMX_PSLLW,RS,RD) +#define MMX_PSLLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLW, MD, MB, MI, MS, RD) +#define MMX_PSLLDir(IM, RD) __MMXLirr(X86_MMX_PSLLDi, IM, RD,_rM, _b110,_rN) +#define MMX_PSLLDrr(RS, RD) _MMXLrr(X86_MMX_PSLLD,RS,RD) +#define MMX_PSLLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLD, MD, MB, MI, MS, RD) +#define MMX_PSLLQir(IM, RD) __MMXLirr(X86_MMX_PSLLQi, IM, RD,_rM, _b110,_rN) +#define MMX_PSLLQrr(RS, RD) _MMXLrr(X86_MMX_PSLLQ,RS,RD) +#define MMX_PSLLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLQ, MD, MB, MI, MS, RD) +#define MMX_PSRAWir(IM, RD) __MMXLirr(X86_MMX_PSRAWi, IM, RD,_rM, _b100,_rN) +#define MMX_PSRAWrr(RS, RD) _MMXLrr(X86_MMX_PSRAW,RS,RD) +#define MMX_PSRAWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAW, MD, MB, MI, MS, RD) +#define MMX_PSRADir(IM, RD) __MMXLirr(X86_MMX_PSRADi, IM, RD,_rM, _b100,_rN) +#define MMX_PSRADrr(RS, RD) _MMXLrr(X86_MMX_PSRAD,RS,RD) +#define MMX_PSRADmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAD, MD, MB, MI, MS, RD) +#define MMX_PSRLWir(IM, RD) __MMXLirr(X86_MMX_PSRLWi, IM, RD,_rM, _b010,_rN) +#define MMX_PSRLWrr(RS, RD) _MMXLrr(X86_MMX_PSRLW,RS,RD) +#define MMX_PSRLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLW, MD, MB, MI, MS, RD) +#define MMX_PSRLDir(IM, RD) __MMXLirr(X86_MMX_PSRLDi, IM, RD,_rM, _b010,_rN) +#define MMX_PSRLDrr(RS, RD) _MMXLrr(X86_MMX_PSRLD,RS,RD) +#define MMX_PSRLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLD, MD, MB, MI, MS, RD) +#define MMX_PSRLQir(IM, RD) __MMXLirr(X86_MMX_PSRLQi, IM, RD,_rM, _b010,_rN) +#define MMX_PSRLQrr(RS, RD) _MMXLrr(X86_MMX_PSRLQ,RS,RD) +#define MMX_PSRLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLQ, MD, MB, MI, MS, RD) +#define MMX_PSUBBrr(RS, RD) _MMXLrr(X86_MMX_PSUBB,RS,RD) +#define MMX_PSUBBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBB, MD, MB, MI, MS, RD) +#define MMX_PSUBWrr(RS, RD) _MMXLrr(X86_MMX_PSUBW,RS,RD) +#define MMX_PSUBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBW, MD, MB, MI, MS, RD) +#define MMX_PSUBDrr(RS, RD) _MMXLrr(X86_MMX_PSUBD,RS,RD) +#define MMX_PSUBDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBD, MD, MB, MI, MS, RD) +#define MMX_PSUBQrr(RS, RD) _MMXLrr(X86_MMX_PSUBQ,RS,RD) +#define MMX_PSUBQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBQ, MD, MB, MI, MS, RD) +#define MMX_PSUBSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBSB,RS,RD) +#define MMX_PSUBSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSB, MD, MB, MI, MS, RD) +#define MMX_PSUBSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBSW,RS,RD) +#define MMX_PSUBSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSW, MD, MB, MI, MS, RD) +#define MMX_PSUBUSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSB,RS,RD) +#define MMX_PSUBUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSB, MD, MB, MI, MS, RD) +#define MMX_PSUBUSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSW,RS,RD) +#define MMX_PSUBUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSW, MD, MB, MI, MS, RD) +#define MMX_PUNPCKHBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHBW,RS,RD) +#define MMX_PUNPCKHBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHBW, MD, MB, MI, MS, RD) +#define MMX_PUNPCKHWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHWD,RS,RD) +#define MMX_PUNPCKHWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHWD, MD, MB, MI, MS, RD) +#define MMX_PUNPCKHDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHDQ,RS,RD) +#define MMX_PUNPCKHDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHDQ, MD, MB, MI, MS, RD) +#define MMX_PUNPCKLBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLBW,RS,RD) +#define MMX_PUNPCKLBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLBW, MD, MB, MI, MS, RD) +#define MMX_PUNPCKLWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLWD,RS,RD) +#define MMX_PUNPCKLWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLWD, MD, MB, MI, MS, RD) +#define MMX_PUNPCKLDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLDQ,RS,RD) +#define MMX_PUNPCKLDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLDQ, MD, MB, MI, MS, RD) +#define MMX_PXORrr(RS, RD) _MMXLrr(X86_MMX_PXOR,RS,RD) +#define MMX_PXORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PXOR, MD, MB, MI, MS, RD) + +#define MMX_PSHUFWirr(IM, RS, RD) __MMXLirr(X86_MMX_PSHUFW, IM, RS,_rM, RD,_rM) +#define MMX_PSHUFWimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PSHUFW, IM, MD, MB, MI, MS, RD,_rM) +#define MMX_PEXTRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r4) +#define MMX_PEXTRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r8) +#define MMX_PINSRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM) +#define MMX_PINSRWLimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r4) +#define MMX_PINSRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM) +#define MMX_PINSRWQimr(IM, MD, MB, MI, MS, RD) __MMXQimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r8) + +// Additionnal MMX instructions, brought by SSSE3 ISA +#define MMX_PABSBrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSB,RS,RD) +#define MMX_PABSBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSB, MD, MB, MI, MS, RD) +#define MMX_PABSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSW,RS,RD) +#define MMX_PABSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSW, MD, MB, MI, MS, RD) +#define MMX_PABSDrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSD,RS,RD) +#define MMX_PABSDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSD, MD, MB, MI, MS, RD) +#define MMX_PHADDWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDW,RS,RD) +#define MMX_PHADDWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDW, MD, MB, MI, MS, RD) +#define MMX_PHADDDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDD,RS,RD) +#define MMX_PHADDDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDD, MD, MB, MI, MS, RD) +#define MMX_PHADDSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDSW,RS,RD) +#define MMX_PHADDSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDSW, MD, MB, MI, MS, RD) +#define MMX_PHSUBWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBW,RS,RD) +#define MMX_PHSUBWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBW, MD, MB, MI, MS, RD) +#define MMX_PHSUBDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBD,RS,RD) +#define MMX_PHSUBDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBD, MD, MB, MI, MS, RD) +#define MMX_PHSUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBSW,RS,RD) +#define MMX_PHSUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBSW, MD, MB, MI, MS, RD) +#define MMX_PMADDUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMADDUBSW,RS,RD) +#define MMX_PMADDUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMADDUBSW, MD, MB, MI, MS, RD) +#define MMX_PMULHRSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMULHRSW,RS,RD) +#define MMX_PMULHRSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMULHRSW, MD, MB, MI, MS, RD) +#define MMX_PSHUFBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSHUFB,RS,RD) +#define MMX_PSHUFBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSHUFB, MD, MB, MI, MS, RD) +#define MMX_PSIGNBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNB,RS,RD) +#define MMX_PSIGNBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNB, MD, MB, MI, MS, RD) +#define MMX_PSIGNWrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNW,RS,RD) +#define MMX_PSIGNWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNW, MD, MB, MI, MS, RD) +#define MMX_PSIGNDrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGND,RS,RD) +#define MMX_PSIGNDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGND, MD, MB, MI, MS, RD) + +#define EMMS() _OO (0x0f77 ) + + +/* --- Media 128-bit instructions ------------------------------------------ */ + +enum { + X86_SSE_CC_EQ = 0, + X86_SSE_CC_LT = 1, + X86_SSE_CC_GT = 1, + X86_SSE_CC_LE = 2, + X86_SSE_CC_GE = 2, + X86_SSE_CC_U = 3, + X86_SSE_CC_NEQ = 4, + X86_SSE_CC_NLT = 5, + X86_SSE_CC_NGT = 5, + X86_SSE_CC_NLE = 6, + X86_SSE_CC_NGE = 6, + X86_SSE_CC_O = 7 +}; + +enum { + X86_SSE_UCOMI = 0x2e, + X86_SSE_COMI = 0x2f, + X86_SSE_CMP = 0xc2, + X86_SSE_SQRT = 0x51, + X86_SSE_RSQRT = 0x52, + X86_SSE_RCP = 0x53, + X86_SSE_AND = 0x54, + X86_SSE_ANDN = 0x55, + X86_SSE_OR = 0x56, + X86_SSE_XOR = 0x57, + X86_SSE_ADD = 0x58, + X86_SSE_MUL = 0x59, + X86_SSE_SUB = 0x5c, + X86_SSE_MIN = 0x5d, + X86_SSE_DIV = 0x5e, + X86_SSE_MAX = 0x5f, + X86_SSE_CVTDQ2PD = 0xe6, + X86_SSE_CVTDQ2PS = 0x5b, + X86_SSE_CVTPD2DQ = 0xe6, + X86_SSE_CVTPD2PI = 0x2d, + X86_SSE_CVTPD2PS = 0x5a, + X86_SSE_CVTPI2PD = 0x2a, + X86_SSE_CVTPI2PS = 0x2a, + X86_SSE_CVTPS2DQ = 0x5b, + X86_SSE_CVTPS2PD = 0x5a, + X86_SSE_CVTPS2PI = 0x2d, + X86_SSE_CVTSD2SI = 0x2d, + X86_SSE_CVTSD2SS = 0x5a, + X86_SSE_CVTSI2SD = 0x2a, + X86_SSE_CVTSI2SS = 0x2a, + X86_SSE_CVTSS2SD = 0x5a, + X86_SSE_CVTSS2SI = 0x2d, + X86_SSE_CVTTPD2PI = 0x2c, + X86_SSE_CVTTPD2DQ = 0xe6, + X86_SSE_CVTTPS2DQ = 0x5b, + X86_SSE_CVTTPS2PI = 0x2c, + X86_SSE_CVTTSD2SI = 0x2c, + X86_SSE_CVTTSS2SI = 0x2c, + X86_SSE_MOVMSK = 0x50, + X86_SSE_PACKSSDW = 0x6b, + X86_SSE_PACKSSWB = 0x63, + X86_SSE_PACKUSWB = 0x67, + X86_SSE_PADDB = 0xfc, + X86_SSE_PADDD = 0xfe, + X86_SSE_PADDQ = 0xd4, + X86_SSE_PADDSB = 0xec, + X86_SSE_PADDSW = 0xed, + X86_SSE_PADDUSB = 0xdc, + X86_SSE_PADDUSW = 0xdd, + X86_SSE_PADDW = 0xfd, + X86_SSE_PAND = 0xdb, + X86_SSE_PANDN = 0xdf, + X86_SSE_PAVGB = 0xe0, + X86_SSE_PAVGW = 0xe3, + X86_SSE_PCMPEQB = 0x74, + X86_SSE_PCMPEQD = 0x76, + X86_SSE_PCMPEQW = 0x75, + X86_SSE_PCMPGTB = 0x64, + X86_SSE_PCMPGTD = 0x66, + X86_SSE_PCMPGTW = 0x65, + X86_SSE_PMADDWD = 0xf5, + X86_SSE_PMAXSW = 0xee, + X86_SSE_PMAXUB = 0xde, + X86_SSE_PMINSW = 0xea, + X86_SSE_PMINUB = 0xda, + X86_SSE_PMOVMSKB = 0xd7, + X86_SSE_PMULHUW = 0xe4, + X86_SSE_PMULHW = 0xe5, + X86_SSE_PMULLW = 0xd5, + X86_SSE_PMULUDQ = 0xf4, + X86_SSE_POR = 0xeb, + X86_SSE_PSADBW = 0xf6, + X86_SSE_PSLLD = 0xf2, + X86_SSE_PSLLQ = 0xf3, + X86_SSE_PSLLW = 0xf1, + X86_SSE_PSRAD = 0xe2, + X86_SSE_PSRAW = 0xe1, + X86_SSE_PSRLD = 0xd2, + X86_SSE_PSRLQ = 0xd3, + X86_SSE_PSRLW = 0xd1, + X86_SSE_PSUBB = 0xf8, + X86_SSE_PSUBD = 0xfa, + X86_SSE_PSUBQ = 0xfb, + X86_SSE_PSUBSB = 0xe8, + X86_SSE_PSUBSW = 0xe9, + X86_SSE_PSUBUSB = 0xd8, + X86_SSE_PSUBUSW = 0xd9, + X86_SSE_PSUBW = 0xf9, + X86_SSE_PUNPCKHBW = 0x68, + X86_SSE_PUNPCKHDQ = 0x6a, + X86_SSE_PUNPCKHQDQ = 0x6d, + X86_SSE_PUNPCKHWD = 0x69, + X86_SSE_PUNPCKLBW = 0x60, + X86_SSE_PUNPCKLDQ = 0x62, + X86_SSE_PUNPCKLQDQ = 0x6c, + X86_SSE_PUNPCKLWD = 0x61, + X86_SSE_PXOR = 0xef, + X86_SSSE3_PSHUFB = 0x00, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _SSSE3Lrr(OP1,OP2,RS,RSA,RD,RDA) (_B(0x66), _REXLrr(RD,RD), _B(0x0f), _OO_Mrm (((OP1)<<8)|(OP2) ,_b11,RDA(RD),RSA(RS) )) +#define _SSSE3Lmr(OP1,OP2,MD,MB,MI,MS,RD,RDA) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X (((OP1)<<8)|(OP2) ,RDA(RD) ,MD,MB,MI,MS )) +#define _SSSE3Lirr(OP1,OP2,IM,RS,RD) (_B(0x66), _REXLrr(RD, RS), _B(0x0f), _OO_Mrm_B (((OP1)<<8)|(OP2) ,_b11,_rX(RD),_rX(RS) ,_u8(IM))) +#define _SSSE3Limr(OP1,OP2,IM,MD,MB,MI,MS,RD) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X_B (((OP1)<<8)|(OP2) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM))) + +#define __SSELir(OP,MO,IM,RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0f00|(OP) ,_b11,MO ,_rX(RD) ,_u8(IM))) +#define __SSELim(OP,MO,IM,MD,MB,MI,MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0f00|(OP) ,MO ,MD,MB,MI,MS ,_u8(IM))) +#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __SSELirr(OP,IM,RS,RD) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,_rX(RD),_rX(RS) ,_u8(IM))) +#define __SSELimr(OP,IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X_B (0x0f00|(OP) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM))) + +#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA)) +#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS)) +#define _SSELir(PX,OP,MO,IM,RD) (_B(PX), __SSELir(OP, MO, IM, RD)) +#define _SSELim(PX,OP,MO,IM,MD,MB,MI,MS) (_B(PX), __SSELim(OP, MO, IM, MD, MB, MI, MS)) +#define _SSELirr(PX,OP,IM,RS,RD) (_B(PX), __SSELirr(OP, IM, RS, RD)) +#define _SSELimr(PX,OP,IM,MD,MB,MI,MS,RD) (_B(PX), __SSELimr(OP, IM, MD, MB, MI, MS, RD)) + +#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA)) +#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS)) + +#define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX) +#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPSirr(OP,IM,RS,RD) __SSELirr( OP, IM, RS, RD) +#define _SSEPSimr(OP,IM,MD,MB,MI,MS,RD) __SSELimr( OP, IM, MD, MB, MI, MS, RD) + +#define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX) +#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPDirr(OP,IM,RS,RD) _SSELirr(0x66, OP, IM, RS, RD) +#define _SSEPDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0x66, OP, IM, MD, MB, MI, MS, RD) + +#define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX) +#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESSirr(OP,IM,RS,RD) _SSELirr(0xf3, OP, IM, RS, RD) +#define _SSESSimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf3, OP, IM, MD, MB, MI, MS, RD) + +#define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX) +#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESDirr(OP,IM,RS,RD) _SSELirr(0xf2, OP, IM, RS, RD) +#define _SSESDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf2, OP, IM, MD, MB, MI, MS, RD) + +#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD) +#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD) +#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD) +#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD) +#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD) +#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) +#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD) +#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) + +#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD) +#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD) +#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD) +#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD) + +#define CMPPSrr(IM, RS, RD) _SSEPSirr(X86_SSE_CMP, IM, RS, RD) +#define CMPPSmr(IM, MD, MB, MI, MS, RD) _SSEPSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) +#define CMPPDrr(IM, RS, RD) _SSEPDirr(X86_SSE_CMP, IM, RS, RD) +#define CMPPDmr(IM, MD, MB, MI, MS, RD) _SSEPDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) + +#define CMPSSrr(IM, RS, RD) _SSESSirr(X86_SSE_CMP, IM, RS, RD) +#define CMPSSmr(IM, MD, MB, MI, MS, RD) _SSESSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) +#define CMPSDrr(IM, RS, RD) _SSESDirr(X86_SSE_CMP, IM, RS, RD) +#define CMPSDmr(IM, MD, MB, MI, MS, RD) _SSESDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) + +#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD) +#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD) +#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD) +#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD) +#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD) +#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD) +#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD) +#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD) +#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD) +#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD) +#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD) +#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD) +#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD) +#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD) +#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD) +#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD) +#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD) +#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD) +#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD) +#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD) + +#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD) +#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) +#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD) +#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) + +#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) +#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) + +#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD) +#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD) +#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD) +#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD) +#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD) +#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD) +#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD) +#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD) +#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD) +#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD) +#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD) +#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD) + +#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD) +#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD) +#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) + +#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) + +#define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD) +#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD) +#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPSrm(0x29, RS, MD, MB, MI, MS) + +#define MOVAPDrr(RS, RD) _SSEPDrr(0x28, RS, RD) +#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD) +#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS) + +#define CVTDQ2PDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTDQ2PD, RS,_rX, RD,_rX) +#define CVTDQ2PDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTDQ2PD, MD, MB, MI, MS, RD,_rX) +#define CVTDQ2PSrr(RS, RD) __SSELrr( X86_SSE_CVTDQ2PS, RS,_rX, RD,_rX) +#define CVTDQ2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTDQ2PS, MD, MB, MI, MS, RD,_rX) +#define CVTPD2DQrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTPD2DQ, RS,_rX, RD,_rX) +#define CVTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTPD2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PI, RS,_rX, RD,_rM) +#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PI, MD, MB, MI, MS, RD,_rM) +#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PS, RS,_rX, RD,_rX) +#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PS, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPI2PD, RS,_rM, RD,_rX) +#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPI2PD, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTPI2PS, RS,_rM, RD,_rX) +#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPI2PS, MD, MB, MI, MS, RD,_rX) +#define CVTPS2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPS2DQ, RS,_rX, RD,_rX) +#define CVTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPS2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PD, RS,_rX, RD,_rX) +#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PD, MD, MB, MI, MS, RD,_rX) +#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PI, RS,_rX, RD,_rM) +#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PI, MD, MB, MI, MS, RD,_rM) +#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r4) +#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r4) +#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r8) +#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r8) +#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SS, RS,_rX, RD,_rX) +#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI2SD, RS,_r4, RD,_rX) +#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI2SD, RS,_r8, RD,_rX) +#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI2SS, RS,_r4, RD,_rX) +#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI2SS, RS,_r8, RD,_rX) +#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX) +#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SD, RS,_rX, RD,_rX) +#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SD, MD, MB, MI, MS, RD,_rX) +#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r4) +#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r4) +#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r8) +#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r8) +#define CVTTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2PI, RS,_rX, RD,_rM) +#define CVTTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2PI, MD, MB, MI, MS, RD,_rM) +#define CVTTPD2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2DQ, RS,_rX, RD,_rX) +#define CVTTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTTPS2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTPS2DQ, RS,_rX, RD,_rX) +#define CVTTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTPS2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTTPS2PI, RS,_rX, RD,_rM) +#define CVTTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTTPS2PI, MD, MB, MI, MS, RD,_rM) +#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r4) +#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r4) +#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r8) +#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r8) +#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r4) +#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r4) +#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r8) +#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r8) + +#define MOVDXDrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) +#define MOVDXDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) +#define MOVQXDrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) +#define MOVQXDmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXSrr(RS, RD) _SSELrr(0x66, 0x7e, RD,_r4, RS,_rX) +#define MOVDXSrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define MOVQXSrr(RS, RD) _SSEQrr(0x66, 0x7e, RD,_r8, RS,_rX) +#define MOVQXSrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) + +#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM) +#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM) +#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM) +#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM) + +#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4) +#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS) +#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8) +#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS) + +#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM) +#define MOVMSKPSrr(RS, RD) __SSELrr( 0x50, RS,_rX, RD,_r4) +#define MOVMSKPDrr(RS, RD) _SSELrr(0x66, 0x50, RS,_rX, RD,_r4) + +#define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX) +#define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX) + +#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX) +#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX) +#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX) +#define MOVHPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS) +#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x16, MD, MB, MI, MS, RD,_rX) +#define MOVHPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x17, RS,_rX, MD, MB, MI, MS) + +#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX) +#define MOVLPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS) +#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x12, MD, MB, MI, MS, RD,_rX) +#define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS) + + +/* --- Floating-Point instructions ----------------------------------------- */ + +enum { + X86_F2XM1 = 0xd9f0, + X86_FABS = 0xd9e1, + X86_FADD = 0xd8c0, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIADD = 0xda00, // m32int, m16int + X86_FBLD = 0xdf04, // mem + X86_FBSTP = 0xdf06, // mem + X86_FCHS = 0xd9e0, + X86_FCMOVB = 0xdac0, // sti0 + X86_FCMOVE = 0xdac8, // sti0 + X86_FCMOVBE = 0xdad0, // sti0 + X86_FCMOVU = 0xdad8, // sti0 + X86_FCMOVNB = 0xdbc0, // sti0 + X86_FCMOVNE = 0xdbc8, // sti0 + X86_FCMOVNBE = 0xdbd0, // sti0 + X86_FCMOVNU = 0xdbd8, // sti0 + X86_FCOM = 0xd8d2, // m32fp, m64fp, sti + X86_FCOMP = 0xd8db, // m32fp, m64fp, sti + X86_FCOMPP = 0xded9, + X86_FCOMI = 0xdbf0, // sti0 + X86_FCOMIP = 0xdff0, // sti0 + X86_FUCOMI = 0xdbe8, // sti0 + X86_FUCOMIP = 0xdfe8, // sti0 + X86_FCOS = 0xd9ff, + X86_FDECSTP = 0xd9f6, + X86_FDIV = 0xd8f6, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIDIV = 0xda06, // m32int, m16int + X86_FDIVR = 0xd8ff, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIDIVR = 0xda07, // m32int, m16int + X86_FFREE = 0xddc0, // sti + X86_FICOM = 0xda02, // m32int, m16int + X86_FICOMP = 0xda03, // m32int, m16int + X86_FILD = 0xdb00, // m32int, m16int + X86_FILDQ = 0xdf05, // mem + X86_FINCSTP = 0xd9f7, + X86_FIST = 0xdb02, // m32int, m16int + X86_FISTP = 0xdb03, // m32int, m16int + X86_FISTPQ = 0xdf07, // mem + X86_FISTTP = 0xdb01, // m32int, m16int + X86_FISTTPQ = 0xdd01, // mem + X86_FLD = 0xd900, // m32fp, m64fp + X86_FLDT = 0xdb05, // mem + X86_FLD1 = 0xd9e8, + X86_FLDL2T = 0xd9e9, + X86_FLDL2E = 0xd9ea, + X86_FLDPI = 0xd9eb, + X86_FLDLG2 = 0xd9ec, + X86_FLDLN2 = 0xd9ed, + X86_FLDZ = 0xd9ee, + X86_FMUL = 0xd8c9, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIMUL = 0xda01, // m32int, m16int + X86_FNOP = 0xd9d0, + X86_FPATAN = 0xd9f3, + X86_FPREM = 0xd9f8, + X86_FPREM1 = 0xd9f5, + X86_FPTAN = 0xd9f2, + X86_FRNDINT = 0xd9fc, + X86_FSCALE = 0xd9fd, + X86_FSIN = 0xd9fe, + X86_FSINCOS = 0xd9fb, + X86_FSQRT = 0xd9fa, + X86_FSTS = 0xd902, // mem + X86_FSTD = 0xdd02, // mem + X86_FST = 0xddd0, // sti + X86_FSTPS = 0xd903, // mem + X86_FSTPD = 0xdd03, // mem + X86_FSTPT = 0xdb07, // mem + X86_FSTP = 0xddd8, // sti + X86_FSUB = 0xd8e4, // m32fp, m64fp, sti0, st0i, pst0i + X86_FISUB = 0xda04, // m32int, m16int + X86_FSUBR = 0xd8ed, // m32fp, m64fp, sti0, st0i, pst0i + X86_FISUBR = 0xda05, // m32int, m16int + X86_FTST = 0xd9e4, + X86_FUCOM = 0xdde0, // sti + X86_FUCOMP = 0xdde8, // sti + X86_FUCOMPP = 0xdae9, + X86_FXAM = 0xd9e5, + X86_FXCH = 0xd9c8, // sti + X86_FXTRACT = 0xd9f4, + X86_FYL2X = 0xd9f1, + X86_FYL2XP1 = 0xd9f9, +}; + +#define _FPU(OP) _OO(OP) +#define _FPUm(OP, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X((OP)>>8, (OP)&7, MD, MB, MI, MS)) +#define _FPUSm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS) +#define _FPUDm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS) +#define _FPULm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS) +#define _FPUWm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS) +#define _FPUr(OP, RR) _OOr((OP)&0xfff8, _rF(RR)) +#define _FPU0r(OP, RD) _FPUr((OP)|0x400, RD) +#define _FPUr0(OP, RS) _FPUr((OP) , RS) +#define _FPUrr(OP, RS, RD) (_rST0P(RS) ? _FPU0r(OP, RD) : (_rST0P(RD) ? _FPUr0(OP, RS) : x86_emit_failure("FPU instruction without st0"))) +#define _FPUP0r(OP, RD) _FPU0r((OP)|0x200, RD) + +#define F2XM1() _FPU(X86_F2XM1) +#define FABS() _FPU(X86_FABS) +#define FADDSm(MD, MB, MI, MS) _FPUSm(X86_FADD, MD, MB, MI, MS) +#define FADDDm(MD, MB, MI, MS) _FPUDm(X86_FADD, MD, MB, MI, MS) +#define FADDP0r(RD) _FPUP0r(X86_FADD, RD) +#define FADDrr(RS, RD) _FPUrr(X86_FADD, RS, RD) +#define FADD0r(RD) _FPU0r(X86_FADD, RD) +#define FADDr0(RS) _FPUr0(X86_FADD, RS) +#define FIADDWm(MD, MB, MI, MS) _FPUWm(X86_FIADD, MD, MB, MI, MS) +#define FIADDLm(MD, MB, MI, MS) _FPULm(X86_FIADD, MD, MB, MI, MS) +#define FBLDm(MD, MB, MI, MS) _FPUm(X86_FBLD, MD, MB, MI, MS) +#define FBSTPm(MD, MB, MI, MS) _FPUm(X86_FBSTP, MD, MB, MI, MS) +#define FCHS() _FPU(X86_FCHS) +#define FCMOVBr0(RS) _FPUr0(X86_FCMOVB, RS) +#define FCMOVEr0(RS) _FPUr0(X86_FCMOVE, RS) +#define FCMOVBEr0(RS) _FPUr0(X86_FCMOVBE, RS) +#define FCMOVUr0(RS) _FPUr0(X86_FCMOVU, RS) +#define FCMOVNBr0(RS) _FPUr0(X86_FCMOVNB, RS) +#define FCMOVNEr0(RS) _FPUr0(X86_FCMOVNE, RS) +#define FCMOVNBEr0(RS) _FPUr0(X86_FCMOVNBE, RS) +#define FCMOVNUr0(RS) _FPUr0(X86_FCMOVNU, RS) +#define FCOMSm(MD, MB, MI, MS) _FPUSm(X86_FCOM, MD, MB, MI, MS) +#define FCOMDm(MD, MB, MI, MS) _FPUDm(X86_FCOM, MD, MB, MI, MS) +#define FCOMr(RD) _FPUr(X86_FCOM, RD) +#define FCOMPSm(MD, MB, MI, MS) _FPUSm(X86_FCOMP, MD, MB, MI, MS) +#define FCOMPDm(MD, MB, MI, MS) _FPUDm(X86_FCOMP, MD, MB, MI, MS) +#define FCOMPr(RD) _FPUr(X86_FCOMP, RD) +#define FCOMPP() _FPU(X86_FCOMPP) +#define FCOMIr0(RS) _FPUr0(X86_FCOMI, RS) +#define FCOMIPr0(RS) _FPUr0(X86_FCOMIP, RS) +#define FUCOMIr0(RS) _FPUr0(X86_FUCOMI, RS) +#define FUCOMIPr0(RS) _FPUr0(X86_FUCOMIP, RS) +#define FCOS() _FPU(X86_FCOS) +#define FDECSTP() _FPU(X86_FDECSTP) +#define FDIVSm(MD, MB, MI, MS) _FPUSm(X86_FDIV, MD, MB, MI, MS) +#define FDIVDm(MD, MB, MI, MS) _FPUDm(X86_FDIV, MD, MB, MI, MS) +#define FDIVP0r(RD) _FPUP0r(X86_FDIV, RD) +#define FDIVrr(RS, RD) _FPUrr(X86_FDIV, RS, RD) +#define FDIV0r(RD) _FPU0r(X86_FDIV, RD) +#define FDIVr0(RS) _FPUr0(X86_FDIV, RS) +#define FIDIVWm(MD, MB, MI, MS) _FPUWm(X86_FIDIV, MD, MB, MI, MS) +#define FIDIVLm(MD, MB, MI, MS) _FPULm(X86_FIDIV, MD, MB, MI, MS) +#define FDIVRSm(MD, MB, MI, MS) _FPUSm(X86_FDIVR, MD, MB, MI, MS) +#define FDIVRDm(MD, MB, MI, MS) _FPUDm(X86_FDIVR, MD, MB, MI, MS) +#define FDIVRP0r(RD) _FPUP0r(X86_FDIVR, RD) +#define FDIVRrr(RS, RD) _FPUrr(X86_FDIVR, RS, RD) +#define FDIVR0r(RD) _FPU0r(X86_FDIVR, RD) +#define FDIVRr0(RS) _FPUr0(X86_FDIVR, RS) +#define FIDIVRWm(MD, MB, MI, MS) _FPUWm(X86_FIDIVR, MD, MB, MI, MS) +#define FIDIVRLm(MD, MB, MI, MS) _FPULm(X86_FIDIVR, MD, MB, MI, MS) +#define FFREEr(RD) _FPUr(X86_FFREE, RD) +#define FICOMWm(MD, MB, MI, MS) _FPUWm(X86_FICOM, MD, MB, MI, MS) +#define FICOMLm(MD, MB, MI, MS) _FPULm(X86_FICOM, MD, MB, MI, MS) +#define FICOMPWm(MD, MB, MI, MS) _FPUWm(X86_FICOMP, MD, MB, MI, MS) +#define FICOMPLm(MD, MB, MI, MS) _FPULm(X86_FICOMP, MD, MB, MI, MS) +#define FILDWm(MD, MB, MI, MS) _FPUWm(X86_FILD, MD, MB, MI, MS) +#define FILDLm(MD, MB, MI, MS) _FPULm(X86_FILD, MD, MB, MI, MS) +#define FILDQm(MD, MB, MI, MS) _FPUm(X86_FILDQ, MD, MB, MI, MS) +#define FINCSTP() _FPU(X86_FINCSTP) +#define FISTWm(MD, MB, MI, MS) _FPUWm(X86_FIST, MD, MB, MI, MS) +#define FISTLm(MD, MB, MI, MS) _FPULm(X86_FIST, MD, MB, MI, MS) +#define FISTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTP, MD, MB, MI, MS) +#define FISTPLm(MD, MB, MI, MS) _FPULm(X86_FISTP, MD, MB, MI, MS) +#define FISTPQm(MD, MB, MI, MS) _FPUm(X86_FISTPQ, MD, MB, MI, MS) +#define FISTTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTTP, MD, MB, MI, MS) +#define FISTTPLm(MD, MB, MI, MS) _FPULm(X86_FISTTP, MD, MB, MI, MS) +#define FISTTPQm(MD, MB, MI, MS) _FPUm(X86_FISTTPQ, MD, MB, MI, MS) +#define FLDSm(MD, MB, MI, MS) _FPUSm(X86_FLD, MD, MB, MI, MS) +#define FLDDm(MD, MB, MI, MS) _FPUDm(X86_FLD, MD, MB, MI, MS) +#define FLDTm(MD, MB, MI, MS) _FPUm(X86_FLDT, MD, MB, MI, MS) +#define FLD1() _FPU(X86_FLD1) +#define FLDL2T() _FPU(X86_FLDL2T) +#define FLDL2E() _FPU(X86_FLDL2E) +#define FLDPI() _FPU(X86_FLDPI) +#define FLDLG2() _FPU(X86_FLDLG2) +#define FLDLN2() _FPU(X86_FLDLN2) +#define FLDZ() _FPU(X86_FLDZ) +#define FMULSm(MD, MB, MI, MS) _FPUSm(X86_FMUL, MD, MB, MI, MS) +#define FMULDm(MD, MB, MI, MS) _FPUDm(X86_FMUL, MD, MB, MI, MS) +#define FMULP0r(RD) _FPUP0r(X86_FMUL, RD) +#define FMULrr(RS, RD) _FPUrr(X86_FMUL, RS, RD) +#define FMUL0r(RD) _FPU0r(X86_FMUL, RD) +#define FMULr0(RS) _FPUr0(X86_FMUL, RS) +#define FIMULWm(MD, MB, MI, MS) _FPUWm(X86_FIMUL, MD, MB, MI, MS) +#define FIMULLm(MD, MB, MI, MS) _FPULm(X86_FIMUL, MD, MB, MI, MS) +#define FNOP() _FPU(X86_FNOP) +#define FPATAN() _FPU(X86_FPATAN) +#define FPREM() _FPU(X86_FPREM) +#define FPREM1() _FPU(X86_FPREM1) +#define FPTAN() _FPU(X86_FPTAN) +#define FRNDINT() _FPU(X86_FRNDINT) +#define FSCALE() _FPU(X86_FSCALE) +#define FSIN() _FPU(X86_FSIN) +#define FSINCOS() _FPU(X86_FSINCOS) +#define FSQRT() _FPU(X86_FSQRT) +#define FSTSm(MD, MB, MI, MS) _FPUm(X86_FSTS, MD, MB, MI, MS) +#define FSTDm(MD, MB, MI, MS) _FPUm(X86_FSTD, MD, MB, MI, MS) +#define FSTr(RD) _FPUr(X86_FST, RD) +#define FSTPSm(MD, MB, MI, MS) _FPUm(X86_FSTPS, MD, MB, MI, MS) +#define FSTPDm(MD, MB, MI, MS) _FPUm(X86_FSTPD, MD, MB, MI, MS) +#define FSTPTm(MD, MB, MI, MS) _FPUm(X86_FSTPT, MD, MB, MI, MS) +#define FSTPr(RD) _FPUr(X86_FSTP, RD) +#define FSUBSm(MD, MB, MI, MS) _FPUSm(X86_FSUB, MD, MB, MI, MS) +#define FSUBDm(MD, MB, MI, MS) _FPUDm(X86_FSUB, MD, MB, MI, MS) +#define FSUBP0r(RD) _FPUP0r(X86_FSUB, RD) +#define FSUBrr(RS, RD) _FPUrr(X86_FSUB, RS, RD) +#define FSUB0r(RD) _FPU0r(X86_FSUB, RD) +#define FSUBr0(RS) _FPUr0(X86_FSUB, RS) +#define FISUBWm(MD, MB, MI, MS) _FPUWm(X86_FISUB, MD, MB, MI, MS) +#define FISUBLm(MD, MB, MI, MS) _FPULm(X86_FISUB, MD, MB, MI, MS) +#define FSUBRSm(MD, MB, MI, MS) _FPUSm(X86_FSUBR, MD, MB, MI, MS) +#define FSUBRDm(MD, MB, MI, MS) _FPUDm(X86_FSUBR, MD, MB, MI, MS) +#define FSUBRP0r(RD) _FPUP0r(X86_FSUBR, RD) +#define FSUBRrr(RS, RD) _FPUrr(X86_FSUBR, RS, RD) +#define FSUBR0r(RD) _FPU0r(X86_FSUBR, RD) +#define FSUBRr0(RS) _FPUr0(X86_FSUBR, RS) +#define FISUBRWm(MD, MB, MI, MS) _FPUWm(X86_FISUBR, MD, MB, MI, MS) +#define FISUBRLm(MD, MB, MI, MS) _FPULm(X86_FISUBR, MD, MB, MI, MS) +#define FTST() _FPU(X86_FTST) +#define FUCOMr(RD) _FPUr(X86_FUCOM, RD) +#define FUCOMPr(RD) _FPUr(X86_FUCOMP, RD) +#define FUCOMPP() _FPU(X86_FUCOMPP) +#define FXAM() _FPU(X86_FXAM) +#define FXCHr(RD) _FPUr(X86_FXCH, RD) +#define FXTRACT() _FPU(X86_FXTRACT) +#define FYL2X() _FPU(X86_FYL2X) +#define FYL2XP1() _FPU(X86_FYL2XP1) + +#endif /* X86_RTASM_H */