Start the support for AVX instructions with 256-bit %ymm registers. A couple of

notes:
- The instructions are being added with dummy placeholder patterns using some 256
  specifiers, this is not meant to work now, but since there are some multiclasses
  generic enough to accept them,  when we go for codegen, the stuff will be already
  there.
- Add VEX encoding bits to support YMM
- Add MOVUPS and MOVAPS in the first round
- Use "Y" as suffix for those Instructions: MOVUPSYrr, ...
- All AVX instructions in X86InstrSSE.td will move soon to a new X86InstrAVX
  file.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107996 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-09 18:27:43 +00:00
parent e05442d508
commit e86b01c153
11 changed files with 199 additions and 18 deletions

View File

@ -68,6 +68,9 @@ public:
void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
};
}

View File

@ -80,6 +80,10 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "YMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
};
}

View File

@ -3147,6 +3147,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
return true;
}
return false;

View File

@ -227,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
//def f256mem : X86MemOperand<"printf256mem">;
def f256mem : X86MemOperand<"printf256mem">;
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.

View File

@ -106,6 +106,12 @@ def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
// FIXME: move this to a more appropriate place after all AVX is done.
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
// Like 'store', but always requires vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
@ -130,6 +136,16 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr),
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
// FIXME: move this to a more appropriate place after all AVX is done.
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
(v4f64 (alignedload node:$ptr))>;
def alignedloadv8i32 : PatFrag<(ops node:$ptr),
(v8i32 (alignedload node:$ptr))>;
def alignedloadv4i64 : PatFrag<(ops node:$ptr),
(v4i64 (alignedload node:$ptr))>;
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others. If the subtarget
@ -583,6 +599,15 @@ defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups", SSEPackedSingle>, VEX;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, 0>, OpSize, VEX;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
"movaps", SSEPackedSingle>, VEX;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
"movapd", SSEPackedDouble>, OpSize, VEX;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle>, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, 0>, OpSize, VEX;
}
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, TB;
@ -606,6 +631,18 @@ def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
}
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",

View File

@ -75,7 +75,8 @@ public:
unsigned OpNum) {
unsigned SrcReg = MI.getOperand(OpNum).getReg();
unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
(SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
SrcRegNum += 8;
// The registers represented through VEX_VVVV should
@ -454,6 +455,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
break; // No prefix!
}
// Set the vector length to 256-bit if YMM0-YMM15 is used
for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
if (!MI.getOperand(i).isReg())
continue;
unsigned SrcReg = MI.getOperand(i).getReg();
if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
VEX_L = 1;
}
unsigned NumOps = MI.getNumOperands();
unsigned CurOp = 0;

View File

@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
return RegNo-X86::ST0;
case X86::XMM0: case X86::XMM8: case X86::MM0:
case X86::XMM0: case X86::XMM8:
case X86::YMM0: case X86::YMM8: case X86::MM0:
return 0;
case X86::XMM1: case X86::XMM9: case X86::MM1:
case X86::XMM1: case X86::XMM9:
case X86::YMM1: case X86::YMM9: case X86::MM1:
return 1;
case X86::XMM2: case X86::XMM10: case X86::MM2:
case X86::XMM2: case X86::XMM10:
case X86::YMM2: case X86::YMM10: case X86::MM2:
return 2;
case X86::XMM3: case X86::XMM11: case X86::MM3:
case X86::XMM3: case X86::XMM11:
case X86::YMM3: case X86::YMM11: case X86::MM3:
return 3;
case X86::XMM4: case X86::XMM12: case X86::MM4:
case X86::XMM4: case X86::XMM12:
case X86::YMM4: case X86::YMM12: case X86::MM4:
return 4;
case X86::XMM5: case X86::XMM13: case X86::MM5:
case X86::XMM5: case X86::XMM13:
case X86::YMM5: case X86::YMM13: case X86::MM5:
return 5;
case X86::XMM6: case X86::XMM14: case X86::MM6:
case X86::XMM6: case X86::XMM14:
case X86::YMM6: case X86::YMM14: case X86::MM6:
return 6;
case X86::XMM7: case X86::XMM15: case X86::MM7:
case X86::XMM7: case X86::XMM15:
case X86::YMM7: case X86::YMM15: case X86::MM7:
return 7;
case X86::ES:

View File

@ -147,7 +147,7 @@ let Namespace = "X86" in {
def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
// Pseudo Floating Point registers
def FP0 : Register<"fp0">;
def FP1 : Register<"fp1">;
@ -155,7 +155,7 @@ let Namespace = "X86" in {
def FP3 : Register<"fp3">;
def FP4 : Register<"fp4">;
def FP5 : Register<"fp5">;
def FP6 : Register<"fp6">;
def FP6 : Register<"fp6">;
// XMM Registers, used by the various SSE instruction set extensions.
// The sub_ss and sub_sd subregs are the same registers with another regclass.
@ -181,7 +181,8 @@ let Namespace = "X86" in {
}
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
// The sub_ss and sub_sd subregs are the same registers with another regclass.
let CompositeIndices = [(sub_ss), (sub_sd)], SubRegIndices = [sub_xmm] in {
def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
@ -357,7 +358,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
}];
}
def GR32 : RegisterClass<"X86", [i32], 32,
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
@ -412,7 +413,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
def GR64 : RegisterClass<"X86", [i64], 64,
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
@ -446,7 +447,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
}
// Debug registers.
def DEBUG_REG : RegisterClass<"X86", [i32], 32,
def DEBUG_REG : RegisterClass<"X86", [i32], 32,
[DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> {
}
@ -787,7 +788,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@ -803,11 +804,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}
}];
}
def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
VR256Class::iterator
VR256Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (!Subtarget.is64Bit())
return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode.
else
return end();
}
}];
}
// Status flags registers.

View File

@ -12305,3 +12305,52 @@
// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f]
vcmptrue_usps %xmm1, %xmm2, %xmm3
// CHECK: vmovaps (%eax), %ymm2
// CHECK: encoding: [0xc5,0xfc,0x28,0x10]
vmovaps (%eax), %ymm2
// CHECK: vmovaps %ymm1, %ymm2
// CHECK: encoding: [0xc5,0xfc,0x28,0xd1]
vmovaps %ymm1, %ymm2
// CHECK: vmovaps %ymm1, (%eax)
// CHECK: encoding: [0xc5,0xfc,0x29,0x08]
vmovaps %ymm1, (%eax)
// CHECK: vmovapd (%eax), %ymm2
// CHECK: encoding: [0xc5,0xfd,0x28,0x10]
vmovapd (%eax), %ymm2
// CHECK: vmovapd %ymm1, %ymm2
// CHECK: encoding: [0xc5,0xfd,0x28,0xd1]
vmovapd %ymm1, %ymm2
// CHECK: vmovapd %ymm1, (%eax)
// CHECK: encoding: [0xc5,0xfd,0x29,0x08]
vmovapd %ymm1, (%eax)
// CHECK: vmovups (%eax), %ymm2
// CHECK: encoding: [0xc5,0xfc,0x10,0x10]
vmovups (%eax), %ymm2
// CHECK: vmovups %ymm1, %ymm2
// CHECK: encoding: [0xc5,0xfc,0x10,0xd1]
vmovups %ymm1, %ymm2
// CHECK: vmovups %ymm1, (%eax)
// CHECK: encoding: [0xc5,0xfc,0x11,0x08]
vmovups %ymm1, (%eax)
// CHECK: vmovupd (%eax), %ymm2
// CHECK: encoding: [0xc5,0xfd,0x10,0x10]
vmovupd (%eax), %ymm2
// CHECK: vmovupd %ymm1, %ymm2
// CHECK: encoding: [0xc5,0xfd,0x10,0xd1]
vmovupd %ymm1, %ymm2
// CHECK: vmovupd %ymm1, (%eax)
// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
vmovupd %ymm1, (%eax)

View File

@ -2379,3 +2379,52 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
vcmptrue_usps %xmm11, %xmm12, %xmm13
// CHECK: vmovaps (%rax), %ymm12
// CHECK: encoding: [0xc5,0x7c,0x28,0x20]
vmovaps (%rax), %ymm12
// CHECK: vmovaps %ymm11, %ymm12
// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3]
vmovaps %ymm11, %ymm12
// CHECK: vmovaps %ymm11, (%rax)
// CHECK: encoding: [0xc5,0x7c,0x29,0x18]
vmovaps %ymm11, (%rax)
// CHECK: vmovapd (%rax), %ymm12
// CHECK: encoding: [0xc5,0x7d,0x28,0x20]
vmovapd (%rax), %ymm12
// CHECK: vmovapd %ymm11, %ymm12
// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3]
vmovapd %ymm11, %ymm12
// CHECK: vmovapd %ymm11, (%rax)
// CHECK: encoding: [0xc5,0x7d,0x29,0x18]
vmovapd %ymm11, (%rax)
// CHECK: vmovups (%rax), %ymm12
// CHECK: encoding: [0xc5,0x7c,0x10,0x20]
vmovups (%rax), %ymm12
// CHECK: vmovups %ymm11, %ymm12
// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3]
vmovups %ymm11, %ymm12
// CHECK: vmovups %ymm11, (%rax)
// CHECK: encoding: [0xc5,0x7c,0x11,0x18]
vmovups %ymm11, (%rax)
// CHECK: vmovupd (%rax), %ymm12
// CHECK: encoding: [0xc5,0x7d,0x10,0x20]
vmovupd (%rax), %ymm12
// CHECK: vmovupd %ymm11, %ymm12
// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3]
vmovupd %ymm11, %ymm12
// CHECK: vmovupd %ymm11, (%rax)
// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
vmovupd %ymm11, (%rax)

View File

@ -306,6 +306,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
REG("RFP64");
REG("RFP80");
REG("VR128");
REG("VR256");
REG("RST");
REG("SEGMENT_REG");
REG("DEBUG_REG");
@ -339,6 +340,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
MEM("opaque80mem");
MEM("i128mem");
MEM("f128mem");
MEM("f256mem");
MEM("opaque512mem");
// all R, I, R, I