diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td new file mode 100644 index 00000000000..a44da571e6a --- /dev/null +++ b/lib/Target/X86/X86InstrFPStack.td @@ -0,0 +1,482 @@ +//==- X86InstrFPStack.td - Describe the X86 Instruction Set -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 x87 FPU instruction set, defining the +// instructions, and properties of the instructions which are needed for code +// generation, machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +// All FP Stack operations are represented with two instructions here. The +// first instruction, generated by the instruction selector, uses "RFP" +// registers: a traditional register file to reference floating point values. +// These instructions are all psuedo instructions and use the "Fp" prefix. +// The second instruction is defined with FPI, which is the actual instruction +// emitted by the assembler. The FP stackifier pass converts one to the other +// after register allocation occurs. +// +// Note that the FpI instruction should have instruction selection info (e.g. +// a pattern) and the FPI instruction should have emission info (e.g. opcode +// encoding and asm printing info). + +// FPI - Floating Point Instruction template. +class FPI o, Format F, dag ops, string asm> : I {} + +// FpI_ - Floating Point Psuedo Instruction template. Not Predicated. +class FpI_ pattern> + : X86Inst<0, Pseudo, NoImm, ops, ""> { + let FPForm = fp; let FPFormBits = FPForm.Value; + let Pattern = pattern; +} + +// Random Pseudo Instructions. +def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP, + [(set RFP:$dst, X86fpget)]>; // FPR = ST(0) + +let noResults = 1 in + def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP, + [(X86fpset RFP:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR + +// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack. +class FpI pattern> : + FpI_, Requires<[FPStack]>; + + +def FpMOV : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2 + +// Arithmetic +// Add, Sub, Mul, Div. +def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, + [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>; +def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, + [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>; +def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, + [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>; +def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, + [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>; + +class FPST0rInst o, string asm> + : FPI, D8; +class FPrST0Inst o, string asm> + : FPI, DC; +class FPrST0PInst o, string asm> + : FPI, DE; + +// Binary Ops with a memory source. +def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (extloadf64f32 addr:$src2)))]>; + // ST(0) = ST(0) + [mem32] +def FpADD64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>; + // ST(0) = ST(0) + [mem64] +def FpMUL32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fmul RFP:$src1, + (extloadf64f32 addr:$src2)))]>; + // ST(0) = ST(0) * [mem32] +def FpMUL64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>; + // ST(0) = ST(0) * [mem64] +def FpSUB32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub RFP:$src1, + (extloadf64f32 addr:$src2)))]>; + // ST(0) = ST(0) - [mem32] +def FpSUB64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>; + // ST(0) = ST(0) - [mem64] +def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2), + RFP:$src1))]>; + // ST(0) = [mem32] - ST(0) +def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>; + // ST(0) = [mem64] - ST(0) +def FpDIV32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv RFP:$src1, + (extloadf64f32 addr:$src2)))]>; + // ST(0) = ST(0) / [mem32] +def FpDIV64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>; + // ST(0) = ST(0) / [mem64] +def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2), + RFP:$src1))]>; + // ST(0) = [mem32] / ST(0) +def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>; + // ST(0) = [mem64] / ST(0) + + +def FADD32m : FPI<0xD8, MRM0m, (ops f32mem:$src), "fadd{s} $src">; +def FADD64m : FPI<0xDC, MRM0m, (ops f64mem:$src), "fadd{l} $src">; +def FMUL32m : FPI<0xD8, MRM1m, (ops f32mem:$src), "fmul{s} $src">; +def FMUL64m : FPI<0xDC, MRM1m, (ops f64mem:$src), "fmul{l} $src">; +def FSUB32m : FPI<0xD8, MRM4m, (ops f32mem:$src), "fsub{s} $src">; +def FSUB64m : FPI<0xDC, MRM4m, (ops f64mem:$src), "fsub{l} $src">; +def FSUBR32m : FPI<0xD8, MRM5m, (ops f32mem:$src), "fsubr{s} $src">; +def FSUBR64m : FPI<0xDC, MRM5m, (ops f64mem:$src), "fsubr{l} $src">; +def FDIV32m : FPI<0xD8, MRM6m, (ops f32mem:$src), "fdiv{s} $src">; +def FDIV64m : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">; +def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">; +def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">; + +def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) + [mem16int] +def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) + [mem32int] +def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fmul RFP:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) * [mem16int] +def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fmul RFP:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) * [mem32int] +def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub RFP:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) - [mem16int] +def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub RFP:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) - [mem32int] +def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub (X86fild addr:$src2, i16), + RFP:$src1))]>; + // ST(0) = [mem16int] - ST(0) +def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fsub (X86fild addr:$src2, i32), + RFP:$src1))]>; + // ST(0) = [mem32int] - ST(0) +def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv RFP:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) / [mem16int] +def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv RFP:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) / [mem32int] +def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16), + RFP:$src1))]>; + // ST(0) = [mem16int] / ST(0) +def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32), + RFP:$src1))]>; + // ST(0) = [mem32int] / ST(0) + +def FIADD16m : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">; +def FIADD32m : FPI<0xDA, MRM0m, (ops i32mem:$src), "fiadd{l} $src">; +def FIMUL16m : FPI<0xDE, MRM1m, (ops i16mem:$src), "fimul{s} $src">; +def FIMUL32m : FPI<0xDA, MRM1m, (ops i32mem:$src), "fimul{l} $src">; +def FISUB16m : FPI<0xDE, MRM4m, (ops i16mem:$src), "fisub{s} $src">; +def FISUB32m : FPI<0xDA, MRM4m, (ops i32mem:$src), "fisub{l} $src">; +def FISUBR16m : FPI<0xDE, MRM5m, (ops i16mem:$src), "fisubr{s} $src">; +def FISUBR32m : FPI<0xDA, MRM5m, (ops i32mem:$src), "fisubr{l} $src">; +def FIDIV16m : FPI<0xDE, MRM6m, (ops i16mem:$src), "fidiv{s} $src">; +def FIDIV32m : FPI<0xDA, MRM6m, (ops i32mem:$src), "fidiv{l} $src">; +def FIDIVR16m : FPI<0xDE, MRM7m, (ops i16mem:$src), "fidivr{s} $src">; +def FIDIVR32m : FPI<0xDA, MRM7m, (ops i32mem:$src), "fidivr{l} $src">; + +// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion +// of some of the 'reverse' forms of the fsub and fdiv instructions. As such, +// we have to put some 'r's in and take them out of weird places. +def FADDST0r : FPST0rInst <0xC0, "fadd $op">; +def FADDrST0 : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">; +def FADDPrST0 : FPrST0PInst<0xC0, "faddp $op">; +def FSUBRST0r : FPST0rInst <0xE8, "fsubr $op">; +def FSUBrST0 : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">; +def FSUBPrST0 : FPrST0PInst<0xE8, "fsub{r}p $op">; +def FSUBST0r : FPST0rInst <0xE0, "fsub $op">; +def FSUBRrST0 : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">; +def FSUBRPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">; +def FMULST0r : FPST0rInst <0xC8, "fmul $op">; +def FMULrST0 : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">; +def FMULPrST0 : FPrST0PInst<0xC8, "fmulp $op">; +def FDIVRST0r : FPST0rInst <0xF8, "fdivr $op">; +def FDIVrST0 : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">; +def FDIVPrST0 : FPrST0PInst<0xF8, "fdiv{r}p $op">; +def FDIVST0r : FPST0rInst <0xF0, "fdiv $op">; +def FDIVRrST0 : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">; +def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">; + + +// Unary operations. +def FpCHS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, + [(set RFP:$dst, (fneg RFP:$src))]>; +def FpABS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, + [(set RFP:$dst, (fabs RFP:$src))]>; +def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, + [(set RFP:$dst, (fsqrt RFP:$src))]>; +def FpSIN : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, + [(set RFP:$dst, (fsin RFP:$src))]>; +def FpCOS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, + [(set RFP:$dst, (fcos RFP:$src))]>; +def FpTST : FpI<(ops RFP:$src), OneArgFP, + []>; + +def FCHS : FPI<0xE0, RawFrm, (ops), "fchs">, D9; +def FABS : FPI<0xE1, RawFrm, (ops), "fabs">, D9; +def FSQRT : FPI<0xFA, RawFrm, (ops), "fsqrt">, D9; +def FSIN : FPI<0xFE, RawFrm, (ops), "fsin">, D9; +def FCOS : FPI<0xFF, RawFrm, (ops), "fcos">, D9; +def FTST : FPI<0xE4, RawFrm, (ops), "ftst">, D9; + + +// Floating point cmovs. +let isTwoAddress = 1 in { + def FpCMOVB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_B))]>; + def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_BE))]>; + def FpCMOVE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_E))]>; + def FpCMOVP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_P))]>; + def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_AE))]>; + def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_A))]>; + def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_NE))]>; + def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, + [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + X86_COND_NP))]>; +} + +def FCMOVB : FPI<0xC0, AddRegFrm, (ops RST:$op), + "fcmovb {$op, %st(0)|%ST(0), $op}">, DA; +def FCMOVBE : FPI<0xD0, AddRegFrm, (ops RST:$op), + "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA; +def FCMOVE : FPI<0xC8, AddRegFrm, (ops RST:$op), + "fcmove {$op, %st(0)|%ST(0), $op}">, DA; +def FCMOVP : FPI<0xD8, AddRegFrm, (ops RST:$op), + "fcmovu {$op, %st(0)|%ST(0), $op}">, DA; +def FCMOVNB : FPI<0xC0, AddRegFrm, (ops RST:$op), + "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB; +def FCMOVNBE : FPI<0xD0, AddRegFrm, (ops RST:$op), + "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB; +def FCMOVNE : FPI<0xC8, AddRegFrm, (ops RST:$op), + "fcmovne {$op, %st(0)|%ST(0), $op}">, DB; +def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op), + "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB; + +// Floating point loads & stores. +def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP, + [(set RFP:$dst, (extloadf64f32 addr:$src))]>; +def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP, + [(set RFP:$dst, (loadf64 addr:$src))]>; +def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP, + [(set RFP:$dst, (X86fild addr:$src, i16))]>; +def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP, + [(set RFP:$dst, (X86fild addr:$src, i32))]>; +def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP, + [(set RFP:$dst, (X86fild addr:$src, i64))]>; + +def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, + [(truncstore RFP:$src, addr:$op, f32)]>; +def FpST64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, + [(store RFP:$src, addr:$op)]>; + +def FpSTP32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>; +def FpSTP64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>; +def FpIST16m : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>; +def FpIST32m : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>; +def FpIST64m : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>; + +def FLD32m : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">; +def FLD64m : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">; +def FILD16m : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">; +def FILD32m : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">; +def FILD64m : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">; +def FST32m : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">; +def FST64m : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">; +def FSTP32m : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">; +def FSTP64m : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">; +def FIST16m : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">; +def FIST32m : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">; +def FISTP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">; +def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">; +def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">; + +// FISTTP requires SSE3 even though it's a FPStack op. +def FpISTT16m : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP, + [(X86fp_to_i16mem RFP:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def FpISTT32m : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP, + [(X86fp_to_i32mem RFP:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def FpISTT64m : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP, + [(X86fp_to_i64mem RFP:$src, addr:$op)]>, + Requires<[HasSSE3]>; + +def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">; +def FISTTP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">; +def FISTTP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">; + +// FP Stack manipulation instructions. +def FLDrr : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9; +def FSTrr : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD; +def FSTPrr : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD; +def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9; + +// Floating point constant loads. +def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, + [(set RFP:$dst, fp64imm0)]>; +def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, + [(set RFP:$dst, fp64imm1)]>; + +def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9; +def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9; + + +// Floating point compares. +def FpUCOMr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP, + []>; // FPSW = cmp ST(0) with ST(i) +def FpUCOMIr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP, + [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i) + +def FUCOMr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) + (ops RST:$reg), + "fucom $reg">, DD, Imp<[ST0],[]>; +def FUCOMPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop + (ops RST:$reg), + "fucomp $reg">, DD, Imp<[ST0],[]>; +def FUCOMPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop + (ops), + "fucompp">, DA, Imp<[ST0],[]>; + +def FUCOMIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) + (ops RST:$reg), + "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>; +def FUCOMIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop + (ops RST:$reg), + "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>; + + +// Floating point flag ops. +def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags + (ops), "fnstsw", []>, DF, Imp<[],[AX]>; + +def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world + (ops i16mem:$dst), "fnstcw $dst", []>; +def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] + (ops i16mem:$dst), "fldcw $dst", []>; + + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map fld0 to pxor for sse. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst), + "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; +def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst), + "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>, + Requires<[HasSSE2]>, TB, OpSize; + +// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd. +// Upper bits are disregarded. +def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB; +def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE2]>, TB, OpSize; + +// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd. +// Upper bits are disregarded. +def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", + [(set FR32:$dst, (X86loadpf32 addr:$src))]>, + Requires<[HasSSE1]>, TB; +def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), + "movapd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (X86loadpf64 addr:$src))]>, + Requires<[HasSSE2]>, TB, OpSize; + +// Alias bitwise logical operations using SSE logical ops on packed FP values. +let isTwoAddress = 1 in { +let isCommutable = 1 in { +def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + "andps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>, + Requires<[HasSSE1]>, TB; +def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + "andpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; +def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + "orps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>, + Requires<[HasSSE1]>, TB; +def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; +} +def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "andps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fand FR32:$src1, + (X86loadpf32 addr:$src2)))]>, + Requires<[HasSSE1]>, TB; +def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "andpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fand FR64:$src1, + (X86loadpf64 addr:$src2)))]>, + Requires<[HasSSE2]>, TB, OpSize; +def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "orps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fxor FR32:$src1, + (X86loadpf32 addr:$src2)))]>, + Requires<[HasSSE1]>, TB; +def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fxor FR64:$src1, + (X86loadpf64 addr:$src2)))]>, + Requires<[HasSSE2]>, TB, OpSize; + +def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +} diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 58f003f312a..414df5e351c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -191,6 +191,7 @@ def MRMInitReg : Format<32>; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. +def HasMMX : Predicate<"Subtarget->hasMMX()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; @@ -2571,510 +2572,19 @@ def CMPSDrm : I<0xC2, MRMSrcMem, // Floating Point Stack Support //===----------------------------------------------------------------------===// -// Floating point support. All FP Stack operations are represented with two -// instructions here. The first instruction, generated by the instruction -// selector, uses "RFP" registers: a traditional register file to reference -// floating point values. These instructions are all psuedo instructions and -// use the "Fp" prefix. The second instruction is defined with FPI, which is -// the actual instruction emitted by the assembler. The FP stackifier pass -// converts one to the other after register allocation occurs. -// -// Note that the FpI instruction should have instruction selection info (e.g. -// a pattern) and the FPI instruction should have emission info (e.g. opcode -// encoding and asm printing info). - -// FPI - Floating Point Instruction template. -class FPI o, Format F, dag ops, string asm> : I {} - -// FpI_ - Floating Point Psuedo Instruction template. Not Predicated. -class FpI_ pattern> - : X86Inst<0, Pseudo, NoImm, ops, ""> { - let FPForm = fp; let FPFormBits = FPForm.Value; - let Pattern = pattern; -} - -// Random Pseudo Instructions. -def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP, - [(set RFP:$dst, X86fpget)]>; // FPR = ST(0) - -let noResults = 1 in - def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP, - [(X86fpset RFP:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR - -// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack. -class FpI pattern> : - FpI_, Requires<[FPStack]>; - - -def FpMOV : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2 - -// Arithmetic -// Add, Sub, Mul, Div. -def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>; -def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>; -def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>; -def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>; - -class FPST0rInst o, string asm> - : FPI, D8; -class FPrST0Inst o, string asm> - : FPI, DC; -class FPrST0PInst o, string asm> - : FPI, DE; - -// Binary Ops with a memory source. -def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (extloadf64f32 addr:$src2)))]>; - // ST(0) = ST(0) + [mem32] -def FpADD64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>; - // ST(0) = ST(0) + [mem64] -def FpMUL32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, - (extloadf64f32 addr:$src2)))]>; - // ST(0) = ST(0) * [mem32] -def FpMUL64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>; - // ST(0) = ST(0) * [mem64] -def FpSUB32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, - (extloadf64f32 addr:$src2)))]>; - // ST(0) = ST(0) - [mem32] -def FpSUB64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>; - // ST(0) = ST(0) - [mem64] -def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2), - RFP:$src1))]>; - // ST(0) = [mem32] - ST(0) -def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>; - // ST(0) = [mem64] - ST(0) -def FpDIV32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, - (extloadf64f32 addr:$src2)))]>; - // ST(0) = ST(0) / [mem32] -def FpDIV64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>; - // ST(0) = ST(0) / [mem64] -def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2), - RFP:$src1))]>; - // ST(0) = [mem32] / ST(0) -def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>; - // ST(0) = [mem64] / ST(0) - - -def FADD32m : FPI<0xD8, MRM0m, (ops f32mem:$src), "fadd{s} $src">; -def FADD64m : FPI<0xDC, MRM0m, (ops f64mem:$src), "fadd{l} $src">; -def FMUL32m : FPI<0xD8, MRM1m, (ops f32mem:$src), "fmul{s} $src">; -def FMUL64m : FPI<0xDC, MRM1m, (ops f64mem:$src), "fmul{l} $src">; -def FSUB32m : FPI<0xD8, MRM4m, (ops f32mem:$src), "fsub{s} $src">; -def FSUB64m : FPI<0xDC, MRM4m, (ops f64mem:$src), "fsub{l} $src">; -def FSUBR32m : FPI<0xD8, MRM5m, (ops f32mem:$src), "fsubr{s} $src">; -def FSUBR64m : FPI<0xDC, MRM5m, (ops f64mem:$src), "fsubr{l} $src">; -def FDIV32m : FPI<0xD8, MRM6m, (ops f32mem:$src), "fdiv{s} $src">; -def FDIV64m : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">; -def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">; -def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">; - -def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (X86fild addr:$src2, i16)))]>; - // ST(0) = ST(0) + [mem16int] -def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (X86fild addr:$src2, i32)))]>; - // ST(0) = ST(0) + [mem32int] -def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, - (X86fild addr:$src2, i16)))]>; - // ST(0) = ST(0) * [mem16int] -def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, - (X86fild addr:$src2, i32)))]>; - // ST(0) = ST(0) * [mem32int] -def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, - (X86fild addr:$src2, i16)))]>; - // ST(0) = ST(0) - [mem16int] -def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, - (X86fild addr:$src2, i32)))]>; - // ST(0) = ST(0) - [mem32int] -def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (X86fild addr:$src2, i16), - RFP:$src1))]>; - // ST(0) = [mem16int] - ST(0) -def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (X86fild addr:$src2, i32), - RFP:$src1))]>; - // ST(0) = [mem32int] - ST(0) -def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, - (X86fild addr:$src2, i16)))]>; - // ST(0) = ST(0) / [mem16int] -def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, - (X86fild addr:$src2, i32)))]>; - // ST(0) = ST(0) / [mem32int] -def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16), - RFP:$src1))]>; - // ST(0) = [mem16int] / ST(0) -def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32), - RFP:$src1))]>; - // ST(0) = [mem32int] / ST(0) - -def FIADD16m : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">; -def FIADD32m : FPI<0xDA, MRM0m, (ops i32mem:$src), "fiadd{l} $src">; -def FIMUL16m : FPI<0xDE, MRM1m, (ops i16mem:$src), "fimul{s} $src">; -def FIMUL32m : FPI<0xDA, MRM1m, (ops i32mem:$src), "fimul{l} $src">; -def FISUB16m : FPI<0xDE, MRM4m, (ops i16mem:$src), "fisub{s} $src">; -def FISUB32m : FPI<0xDA, MRM4m, (ops i32mem:$src), "fisub{l} $src">; -def FISUBR16m : FPI<0xDE, MRM5m, (ops i16mem:$src), "fisubr{s} $src">; -def FISUBR32m : FPI<0xDA, MRM5m, (ops i32mem:$src), "fisubr{l} $src">; -def FIDIV16m : FPI<0xDE, MRM6m, (ops i16mem:$src), "fidiv{s} $src">; -def FIDIV32m : FPI<0xDA, MRM6m, (ops i32mem:$src), "fidiv{l} $src">; -def FIDIVR16m : FPI<0xDE, MRM7m, (ops i16mem:$src), "fidivr{s} $src">; -def FIDIVR32m : FPI<0xDA, MRM7m, (ops i32mem:$src), "fidivr{l} $src">; - -// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion -// of some of the 'reverse' forms of the fsub and fdiv instructions. As such, -// we have to put some 'r's in and take them out of weird places. -def FADDST0r : FPST0rInst <0xC0, "fadd $op">; -def FADDrST0 : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">; -def FADDPrST0 : FPrST0PInst<0xC0, "faddp $op">; -def FSUBRST0r : FPST0rInst <0xE8, "fsubr $op">; -def FSUBrST0 : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">; -def FSUBPrST0 : FPrST0PInst<0xE8, "fsub{r}p $op">; -def FSUBST0r : FPST0rInst <0xE0, "fsub $op">; -def FSUBRrST0 : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">; -def FSUBRPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">; -def FMULST0r : FPST0rInst <0xC8, "fmul $op">; -def FMULrST0 : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">; -def FMULPrST0 : FPrST0PInst<0xC8, "fmulp $op">; -def FDIVRST0r : FPST0rInst <0xF8, "fdivr $op">; -def FDIVrST0 : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">; -def FDIVPrST0 : FPrST0PInst<0xF8, "fdiv{r}p $op">; -def FDIVST0r : FPST0rInst <0xF0, "fdiv $op">; -def FDIVRrST0 : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">; -def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">; - - -// Unary operations. -def FpCHS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fneg RFP:$src))]>; -def FpABS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fabs RFP:$src))]>; -def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fsqrt RFP:$src))]>; -def FpSIN : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fsin RFP:$src))]>; -def FpCOS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fcos RFP:$src))]>; -def FpTST : FpI<(ops RFP:$src), OneArgFP, - []>; - -def FCHS : FPI<0xE0, RawFrm, (ops), "fchs">, D9; -def FABS : FPI<0xE1, RawFrm, (ops), "fabs">, D9; -def FSQRT : FPI<0xFA, RawFrm, (ops), "fsqrt">, D9; -def FSIN : FPI<0xFE, RawFrm, (ops), "fsin">, D9; -def FCOS : FPI<0xFF, RawFrm, (ops), "fcos">, D9; -def FTST : FPI<0xE4, RawFrm, (ops), "ftst">, D9; - - -// Floating point cmovs. -let isTwoAddress = 1 in { - def FpCMOVB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_B))]>; - def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_BE))]>; - def FpCMOVE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_E))]>; - def FpCMOVP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_P))]>; - def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_AE))]>; - def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_A))]>; - def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_NE))]>; - def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, - X86_COND_NP))]>; -} - -def FCMOVB : FPI<0xC0, AddRegFrm, (ops RST:$op), - "fcmovb {$op, %st(0)|%ST(0), $op}">, DA; -def FCMOVBE : FPI<0xD0, AddRegFrm, (ops RST:$op), - "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA; -def FCMOVE : FPI<0xC8, AddRegFrm, (ops RST:$op), - "fcmove {$op, %st(0)|%ST(0), $op}">, DA; -def FCMOVP : FPI<0xD8, AddRegFrm, (ops RST:$op), - "fcmovu {$op, %st(0)|%ST(0), $op}">, DA; -def FCMOVNB : FPI<0xC0, AddRegFrm, (ops RST:$op), - "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB; -def FCMOVNBE : FPI<0xD0, AddRegFrm, (ops RST:$op), - "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB; -def FCMOVNE : FPI<0xC8, AddRegFrm, (ops RST:$op), - "fcmovne {$op, %st(0)|%ST(0), $op}">, DB; -def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op), - "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB; - -// Floating point loads & stores. -def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP, - [(set RFP:$dst, (extloadf64f32 addr:$src))]>; -def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP, - [(set RFP:$dst, (loadf64 addr:$src))]>; -def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild addr:$src, i16))]>; -def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild addr:$src, i32))]>; -def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild addr:$src, i64))]>; - -def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, - [(truncstore RFP:$src, addr:$op, f32)]>; -def FpST64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, - [(store RFP:$src, addr:$op)]>; - -def FpSTP32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>; -def FpSTP64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>; -def FpIST16m : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>; -def FpIST32m : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>; -def FpIST64m : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>; - -def FLD32m : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">; -def FLD64m : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">; -def FILD16m : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">; -def FILD32m : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">; -def FILD64m : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">; -def FST32m : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">; -def FST64m : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">; -def FSTP32m : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">; -def FSTP64m : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">; -def FIST16m : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">; -def FIST32m : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">; -def FISTP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">; -def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">; -def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">; - -// FISTTP requires SSE3 even though it's a FPStack op. -def FpISTT16m : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP, - [(X86fp_to_i16mem RFP:$src, addr:$op)]>, - Requires<[HasSSE3]>; -def FpISTT32m : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP, - [(X86fp_to_i32mem RFP:$src, addr:$op)]>, - Requires<[HasSSE3]>; -def FpISTT64m : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP, - [(X86fp_to_i64mem RFP:$src, addr:$op)]>, - Requires<[HasSSE3]>; - -def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">; -def FISTTP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">; -def FISTTP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">; - -// FP Stack manipulation instructions. -def FLDrr : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9; -def FSTrr : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD; -def FSTPrr : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD; -def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9; - -// Floating point constant loads. -def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, - [(set RFP:$dst, fp64imm0)]>; -def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, - [(set RFP:$dst, fp64imm1)]>; - -def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9; -def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9; - - -// Floating point compares. -def FpUCOMr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) -def FpUCOMIr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP, - [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i) - -def FUCOMr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) - (ops RST:$reg), - "fucom $reg">, DD, Imp<[ST0],[]>; -def FUCOMPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop - (ops RST:$reg), - "fucomp $reg">, DD, Imp<[ST0],[]>; -def FUCOMPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop - (ops), - "fucompp">, DA, Imp<[ST0],[]>; - -def FUCOMIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) - (ops RST:$reg), - "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>; -def FUCOMIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop - (ops RST:$reg), - "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>; - - -// Floating point flag ops. -def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags - (ops), "fnstsw", []>, DF, Imp<[],[AX]>; - -def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world - (ops i16mem:$dst), "fnstcw $dst", []>; -def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] - (ops i16mem:$dst), "fldcw $dst", []>; - +include "X86InstrFPStack.td" //===----------------------------------------------------------------------===// // MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2) //===----------------------------------------------------------------------===// -// Move Instructions -def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; -def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; -def MOVD64mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; - -def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; -def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; -def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; - - -def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src), - "movq {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; -def MOVQ64rm : I<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; -def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src), - "movq {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; - -def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, - Requires<[HasSSE2]>; -def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, - Requires<[HasSSE2]>; -def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), - "movq {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; +include "X86InstrMMX.td" //===----------------------------------------------------------------------===// // XMM Packed Floating point support (requires SSE / SSE2) //===----------------------------------------------------------------------===// -def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}v2", []>, - Requires<[HasSSE2]>, TB, OpSize; - -def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}",[]>, - Requires<[HasSSE1]>, TB; -def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB, OpSize; -def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}",[]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Logical -let isTwoAddress = 1 in { -let isCommutable = 1 in { -def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "andps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>, - Requires<[HasSSE1]>, TB; -def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "andpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "xorps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>, - Requires<[HasSSE1]>, TB; -def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "xorpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -} -def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "andps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fand V4F32:$src1, - (X86loadpv4f32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "andpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fand V2F64:$src1, - (X86loadpv2f64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; -def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "xorps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fxor V4F32:$src1, - (X86loadpv4f32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "xorpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fxor V2F64:$src1, - (X86loadpv2f64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; - -def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -} +include "X86InstrSSE.td" //===----------------------------------------------------------------------===// // Miscellaneous Instructions @@ -3100,102 +2610,6 @@ def MOV32r0 : I<0x31, MRMInitReg, (ops R32:$dst), "xor{l} $dst, $dst", [(set R32:$dst, 0)]>; -// Alias instructions that map fld0 to pxor for sse. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. -def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst), - "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; -def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst), - "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd. -// Upper bits are disregarded. -def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE2]>, TB, OpSize; - -// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd. -// Upper bits are disregarded. -def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), - "movaps {$src, $dst|$dst, $src}", - [(set FR32:$dst, (X86loadpf32 addr:$src))]>, - Requires<[HasSSE1]>, TB; -def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), - "movapd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (X86loadpf64 addr:$src))]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let isTwoAddress = 1 in { -let isCommutable = 1 in { -def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "andps {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, TB; -def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "andpd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "xorps {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, TB; -def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "xorpd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -} -def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "andps {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, - (X86loadpf32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "andpd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, - (X86loadpf64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; -def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "xorps {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, - (X86loadpf32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "xorpd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, - (X86loadpf64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; - -def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -} - //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td new file mode 100644 index 00000000000..45f7f8a3c69 --- /dev/null +++ b/lib/Target/X86/X86InstrMMX.td @@ -0,0 +1,57 @@ +//====- X86InstrMMX.td - Describe the X86 Instruction Set -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 MMX instruction set, defining the instructions, +// and properties of the instructions which are needed for code generation, +// machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +// Move Instructions +def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; +def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; +def MOVD64mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; + +def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; +def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; +def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; + + +def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src), + "movq {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; +def MOVQ64rm : I<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src), + "movq {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; +def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src), + "movq {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; + +def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), + "movq {$src, $dst|$dst, $src}", []>, XS, + Requires<[HasSSE2]>; +def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), + "movq {$src, $dst|$dst, $src}", []>, XS, + Requires<[HasSSE2]>; +def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), + "movq {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; + diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td new file mode 100644 index 00000000000..906ea8dca48 --- /dev/null +++ b/lib/Target/X86/X86InstrSSE.td @@ -0,0 +1,101 @@ +//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 SSE instruction set, defining the instructions, +// and properties of the instructions which are needed for code generation, +// machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB; +def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE2]>, TB, OpSize; + +def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB; +def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}",[]>, + Requires<[HasSSE1]>, TB; +def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB, OpSize; +def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}",[]>, + Requires<[HasSSE2]>, TB, OpSize; + +// Logical +let isTwoAddress = 1 in { +let isCommutable = 1 in { +def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "andps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>, + Requires<[HasSSE1]>, TB; +def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "andpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; +def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "orps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>, + Requires<[HasSSE1]>, TB; +def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; +} +def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "andps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fand V4F32:$src1, + (X86loadpv4f32 addr:$src2)))]>, + Requires<[HasSSE1]>, TB; +def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "andpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fand V2F64:$src1, + (X86loadpv2f64 addr:$src2)))]>, + Requires<[HasSSE2]>, TB, OpSize; +def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "orps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fxor V4F32:$src1, + (X86loadpv4f32 addr:$src2)))]>, + Requires<[HasSSE1]>, TB; +def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fxor V2F64:$src1, + (X86loadpv2f64 addr:$src2)))]>, + Requires<[HasSSE2]>, TB, OpSize; + +def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +}