Add new AVX vmaskmov instructions, and also fix the VEX encoding bits to support it

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108983 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-21 02:46:58 +00:00
parent 3bdfbf5d56
commit 4b13f3cf3d
4 changed files with 109 additions and 11 deletions

View File

@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
let isAsmParserOnly = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
[(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
[(store FR64:$src, addr:$dst)]>, XD, VEX;
}
// Extract and store.
@ -5018,4 +5018,27 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
// Conditional SIMD Packed Loads and Stores
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V;
}
defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps">;
defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd">;
} // isAsmParserOnly

View File

@ -469,30 +469,36 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned NumOps = MI.getNumOperands();
unsigned CurOp = 0;
bool IsDestMem = false;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
case X86II::MRMDestMem:
IsDestMem = true;
// The important info for the VEX prefix is never beyond the address
// registers. Don't check beyond that.
NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRMDestMem:
NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRMSrcMem:
case X86II::MRMSrcReg:
if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
// CurOp and NumOps are equal when VEX_R represents a register used
// to index a memory destination (which is the last operand)
CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
CurOp++;
if (HasVEX_4V) {
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
CurOp++;
}
// To only check operands before the memory address ones, start
// the search from the begining
if (IsDestMem)
CurOp = 0;
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
if (TSFlags & X86II::VEX_I8IMM)
@ -833,10 +839,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
SrcRegNum = CurOp + X86::AddrNumOperands;
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
EmitMemModRMByte(MI, CurOp,
GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)),
GetX86RegNum(MI.getOperand(SrcRegNum)),
TSFlags, CurByte, OS, Fixups);
CurOp += X86::AddrNumOperands + 1;
CurOp = SrcRegNum + 1;
break;
case X86II::MRMSrcReg:

View File

@ -13030,3 +13030,35 @@
// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07]
vextractf128 $7, %ymm2, (%eax)
// CHECK: vmaskmovpd %xmm2, %xmm5, (%eax)
// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10]
vmaskmovpd %xmm2, %xmm5, (%eax)
// CHECK: vmaskmovpd %ymm2, %ymm5, (%eax)
// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10]
vmaskmovpd %ymm2, %ymm5, (%eax)
// CHECK: vmaskmovpd (%eax), %xmm2, %xmm5
// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28]
vmaskmovpd (%eax), %xmm2, %xmm5
// CHECK: vmaskmovpd (%eax), %ymm2, %ymm5
// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28]
vmaskmovpd (%eax), %ymm2, %ymm5
// CHECK: vmaskmovps %xmm2, %xmm5, (%eax)
// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10]
vmaskmovps %xmm2, %xmm5, (%eax)
// CHECK: vmaskmovps %ymm2, %ymm5, (%eax)
// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10]
vmaskmovps %ymm2, %ymm5, (%eax)
// CHECK: vmaskmovps (%eax), %xmm2, %xmm5
// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28]
vmaskmovps (%eax), %xmm2, %xmm5
// CHECK: vmaskmovps (%eax), %ymm2, %ymm5
// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28]
vmaskmovps (%eax), %ymm2, %ymm5

View File

@ -3104,3 +3104,35 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07]
vextractf128 $7, %ymm12, (%rax)
// CHECK: vmaskmovpd %xmm12, %xmm10, (%rax)
// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20]
vmaskmovpd %xmm12, %xmm10, (%rax)
// CHECK: vmaskmovpd %ymm12, %ymm10, (%rax)
// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20]
vmaskmovpd %ymm12, %ymm10, (%rax)
// CHECK: vmaskmovpd (%rax), %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10]
vmaskmovpd (%rax), %xmm12, %xmm10
// CHECK: vmaskmovpd (%rax), %ymm12, %ymm10
// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10]
vmaskmovpd (%rax), %ymm12, %ymm10
// CHECK: vmaskmovps %xmm12, %xmm10, (%rax)
// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20]
vmaskmovps %xmm12, %xmm10, (%rax)
// CHECK: vmaskmovps %ymm12, %ymm10, (%rax)
// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20]
vmaskmovps %ymm12, %ymm10, (%rax)
// CHECK: vmaskmovps (%rax), %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10]
vmaskmovps (%rax), %xmm12, %xmm10
// CHECK: vmaskmovps (%rax), %ymm12, %ymm10
// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10]
vmaskmovps (%rax), %ymm12, %ymm10