diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp index 8aaf76b2bb2..5c4423057ad 100644 --- a/lib/Target/X86/X86ISelPattern.cpp +++ b/lib/Target/X86/X86ISelPattern.cpp @@ -184,8 +184,8 @@ namespace { if (X86ScalarSSE) { // Set up the FP register classes. - addRegisterClass(MVT::f32, X86::RXMMRegisterClass); - addRegisterClass(MVT::f64, X86::RXMMRegisterClass); + addRegisterClass(MVT::f32, X86::V4F4RegisterClass); + addRegisterClass(MVT::f64, X86::V2F8RegisterClass); // SSE has no load+extend ops setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -4192,10 +4192,10 @@ void ISel::Select(SDOperand N) { case MVT::i8: Opc = X86::MOV8rr; break; case MVT::i16: Opc = X86::MOV16rr; break; case MVT::i32: Opc = X86::MOV32rr; break; - case MVT::f32: Opc = X86::MOVAPSrr; break; + case MVT::f32: Opc = X86::MOVSSrr; break; case MVT::f64: if (X86ScalarSSE) { - Opc = X86::MOVAPDrr; + Opc = X86::MOVSDrr; } else { Opc = X86::FpMOV; ContainsFPCode = true; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2fec723b25b..0f717c3c2d8 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -28,7 +28,7 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, unsigned& destReg) const { MachineOpCode oc = MI.getOpcode(); if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr || - oc == X86::FpMOV || oc == X86::MOVAPDrr || oc == X86::MOVAPSrr) { + oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr) { assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() && MI.getOperand(1).isRegister() && diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 7d89ae09e48..5844aa794fb 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1408,142 +1408,134 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (ops R32:$dst, i16mem:$src), // XMM Floating point support (requires SSE2) //===----------------------------------------------------------------------===// -def MOVSSrm : I<0x10, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), +def MOVSSrr : I<0x10, MRMSrcReg, (ops V4F4:$dst, V4F4:$src), "movss {$src, $dst|$dst, $src}">, XS; -def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, RXMM:$src), +def MOVSSrm : I<0x10, MRMSrcMem, (ops V4F4:$dst, f32mem:$src), "movss {$src, $dst|$dst, $src}">, XS; -def MOVSDrm : I<0x10, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), +def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, V4F4:$src), + "movss {$src, $dst|$dst, $src}">, XS; +def MOVSDrr : I<0x10, MRMSrcReg, (ops V2F8:$dst, V2F8:$src), "movsd {$src, $dst|$dst, $src}">, XD; -def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, RXMM:$src), +def MOVSDrm : I<0x10, MRMSrcMem, (ops V2F8:$dst, f64mem:$src), + "movsd {$src, $dst|$dst, $src}">, XD; +def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, V2F8:$src), "movsd {$src, $dst|$dst, $src}">, XD; -def MOVAPSrr: I<0x28, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), - "movaps {$src, $dst|$dst, $src}">, TB; -def MOVAPSrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), - "movaps {$src, $dst|$dst, $src}">, TB; -def MOVAPSmr: I<0x29, MRMDestMem, (ops f32mem:$dst, RXMM:$src), - "movaps {$src, $dst|$dst, $src}">, TB; -def MOVAPDrr: I<0x28, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), - "movapd {$src, $dst|$dst, $src}">, TB, OpSize; -def MOVAPDrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), - "movapd {$src, $dst|$dst, $src}">, TB, OpSize; -def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src), - "movapd {$src, $dst|$dst, $src}">, TB, OpSize; -def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src), +def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, V2F8:$src), "cvttsd2si {$src, $dst|$dst, $src}">, XD; def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src), "cvttsd2si {$src, $dst|$dst, $src}">, XD; -def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src), +def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, V4F4:$src), "cvttss2si {$src, $dst|$dst, $src}">, XS; def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src), "cvttss2si {$src, $dst|$dst, $src}">, XS; -def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), +def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops V4F4:$dst, V2F8:$src), "cvtsd2ss {$src, $dst|$dst, $src}">, XS; -def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), +def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops V4F4:$dst, f64mem:$src), "cvtsd2ss {$src, $dst|$dst, $src}">, XS; -def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), +def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops V2F8:$dst, V4F4:$src), "cvtss2sd {$src, $dst|$dst, $src}">, XD; -def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), +def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops V2F8:$dst, f32mem:$src), "cvtss2sd {$src, $dst|$dst, $src}">, XD; -def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src), +def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops V4F4:$dst, R32:$src), "cvtsi2ss {$src, $dst|$dst, $src}">, XS; -def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops R32:$dst, f32mem:$src), +def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops V4F4:$dst, i32mem:$src), "cvtsi2ss {$src, $dst|$dst, $src}">, XS; -def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src), +def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops V2F8:$dst, R32:$src), "cvtsi2sd {$src, $dst|$dst, $src}">, XD; -def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops R32:$dst, f64mem:$src), +def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops V2F8:$dst, i32mem:$src), "cvtsi2sd {$src, $dst|$dst, $src}">, XD; -def SQRTSSrm : I<0x51, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), +def SQRTSSrm : I<0x51, MRMSrcMem, (ops V4F4:$dst, f32mem:$src), "subss {$src, $dst|$dst, $src}">, XS; -def SQRTSSrr : I<0x51, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), +def SQRTSSrr : I<0x51, MRMSrcReg, (ops V4F4:$dst, V4F4:$src), "subss {$src, $dst|$dst, $src}">, XS; -def SQRTSDrm : I<0x51, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), +def SQRTSDrm : I<0x51, MRMSrcMem, (ops V2F8:$dst, f64mem:$src), "subsd {$src, $dst|$dst, $src}">, XD; -def SQRTSDrr : I<0x51, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), +def SQRTSDrr : I<0x51, MRMSrcReg, (ops V2F8:$dst, V2F8:$src), "subsd {$src, $dst|$dst, $src}">, XD; -def UCOMISDrr: I<0x2E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), +def UCOMISDrr: I<0x2E, MRMSrcReg, (ops V2F8:$dst, V2F8:$src), "ucomisd {$src, $dst|$dst, $src}">, TB, OpSize; -def UCOMISDrm: I<0x2E, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), +def UCOMISDrm: I<0x2E, MRMSrcMem, (ops V2F8:$dst, f64mem:$src), "ucomisd {$src, $dst|$dst, $src}">, TB, OpSize; -def UCOMISSrr: I<0x2E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), +def UCOMISSrr: I<0x2E, MRMSrcReg, (ops V4F4:$dst, V4F4:$src), "ucomiss {$src, $dst|$dst, $src}">, TB; -def UCOMISSrm: I<0x2E, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), +def UCOMISSrm: I<0x2E, MRMSrcMem, (ops V4F4:$dst, f32mem:$src), "ucomiss {$src, $dst|$dst, $src}">, TB; // Pseudo-instructions that map to fld0 to xorps/xorpd for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -def FLD0SS : I<0x57, MRMSrcReg, (ops RXMM:$dst), +def FLD0SS : I<0x57, MRMSrcReg, (ops V4F4:$dst), "xorps $dst, $dst">, TB; -def FLD0SD : I<0x57, MRMSrcReg, (ops RXMM:$dst), +def FLD0SD : I<0x57, MRMSrcReg, (ops V2F8:$dst), "xorpd $dst, $dst">, TB, OpSize; let isTwoAddress = 1 in { let isCommutable = 1 in { -def ADDSSrr : I<0x58, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ADDSSrr : I<0x58, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "addss {$src, $dst|$dst, $src}">, XS; -def ADDSDrr : I<0x58, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ADDSDrr : I<0x58, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "addsd {$src, $dst|$dst, $src}">, XD; -def ANDPSrr : I<0x54, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "andps {$src, $dst|$dst, $src}">, TB; -def ANDPDrr : I<0x54, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "andpd {$src, $dst|$dst, $src}">, TB, OpSize; -def MULSSrr : I<0x59, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def MULSSrr : I<0x59, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "mulss {$src, $dst|$dst, $src}">, XS; -def MULSDrr : I<0x59, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def MULSDrr : I<0x59, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "mulsd {$src, $dst|$dst, $src}">, XD; -def ORPSrr : I<0x56, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ORPSrr : I<0x56, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "orps {$src, $dst|$dst, $src}">, TB; -def ORPDrr : I<0x56, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ORPDrr : I<0x56, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "orpd {$src, $dst|$dst, $src}">, TB, OpSize; -def XORPSrr : I<0x57, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def XORPSrr : I<0x57, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "xorps {$src, $dst|$dst, $src}">, TB; -def XORPDrr : I<0x57, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def XORPDrr : I<0x57, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "xorpd {$src, $dst|$dst, $src}">, TB, OpSize; } -def ANDNPSrr : I<0x55, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "andnps {$src, $dst|$dst, $src}">, TB; -def ANDNPDrr : I<0x55, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "andnpd {$src, $dst|$dst, $src}">, TB, OpSize; -def ADDSSrm : I<0x58, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src), +def ADDSSrm : I<0x58, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src), "addss {$src, $dst|$dst, $src}">, XS; -def ADDSDrm : I<0x58, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src), +def ADDSDrm : I<0x58, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src), "addsd {$src, $dst|$dst, $src}">, XD; -def MULSSrm : I<0x59, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src), +def MULSSrm : I<0x59, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src), "mulss {$src, $dst|$dst, $src}">, XS; -def MULSDrm : I<0x59, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src), +def MULSDrm : I<0x59, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src), "mulsd {$src, $dst|$dst, $src}">, XD; -def DIVSSrm : I<0x5E, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src), +def DIVSSrm : I<0x5E, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src), "divss {$src, $dst|$dst, $src}">, XS; -def DIVSSrr : I<0x5E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def DIVSSrr : I<0x5E, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "divss {$src, $dst|$dst, $src}">, XS; -def DIVSDrm : I<0x5E, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src), +def DIVSDrm : I<0x5E, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src), "divsd {$src, $dst|$dst, $src}">, XD; -def DIVSDrr : I<0x5E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def DIVSDrr : I<0x5E, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "divsd {$src, $dst|$dst, $src}">, XD; -def SUBSSrm : I<0x5C, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src), +def SUBSSrm : I<0x5C, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src), "subss {$src, $dst|$dst, $src}">, XS; -def SUBSSrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def SUBSSrr : I<0x5C, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src), "subss {$src, $dst|$dst, $src}">, XS; -def SUBSDrm : I<0x5C, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src), +def SUBSDrm : I<0x5C, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src), "subsd {$src, $dst|$dst, $src}">, XD; -def SUBSDrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), +def SUBSDrr : I<0x5C, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src), "subsd {$src, $dst|$dst, $src}">, XD; def CMPSSrr : I<0xC2, MRMSrcReg, - (ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc), + (ops V4F4:$dst, V4F4:$src1, V4F4:$src, SSECC:$cc), "cmp${cc}ss {$src, $dst|$dst, $src}">, XS; def CMPSSrm : I<0xC2, MRMSrcMem, - (ops RXMM:$dst, RXMM:$src1, f32mem:$src, SSECC:$cc), + (ops V4F4:$dst, V4F4:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss {$src, $dst|$dst, $src}">, XS; def CMPSDrr : I<0xC2, MRMSrcReg, - (ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc), + (ops V2F8:$dst, V2F8:$src1, V2F8:$src, SSECC:$cc), "cmp${cc}sd {$src, $dst|$dst, $src}">, XD; def CMPSDrm : I<0xC2, MRMSrcMem, - (ops RXMM:$dst, RXMM:$src1, f64mem:$src, SSECC:$cc), + (ops V2F8:$dst, V2F8:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd {$src, $dst|$dst, $src}">, XD; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 12761078964..a0e0aca16c2 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -57,7 +57,9 @@ void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opc = X86::MOV16mr; } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FST64m; - } else if (RC == &X86::RXMMRegClass) { + } else if (RC == &X86::V4F4RegClass) { + Opc = X86::MOVSSmr; + } else if (RC == &X86::V2F8RegClass) { Opc = X86::MOVSDmr; } else { assert(0 && "Unknown regclass"); @@ -79,7 +81,9 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opc = X86::MOV16rm; } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FLD64m; - } else if (RC == &X86::RXMMRegClass) { + } else if (RC == &X86::V4F4RegClass) { + Opc = X86::MOVSSrm; + } else if (RC == &X86::V2F8RegClass) { Opc = X86::MOVSDrm; } else { assert(0 && "Unknown regclass"); @@ -101,8 +105,10 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV16rr; } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FpMOV; - } else if (RC == &X86::RXMMRegClass) { - Opc = X86::MOVAPDrr; + } else if (RC == &X86::V4F4RegClass) { + Opc = X86::MOVSSrr; + } else if (RC == &X86::V2F8RegClass) { + Opc = X86::MOVSDrr; } else { assert(0 && "Unknown regclass"); abort(); @@ -119,7 +125,7 @@ unsigned X86RegisterInfo::isLoadFromStackSlot(MachineInstr *MI, case X86::MOV32rm: case X86::FLD64m: case X86::FLD80m: - case X86::MOVAPDrm: + case X86::MOVSSrm: case X86::MOVSDrm: if (MI->getOperand(1).isFrameIndex() && MI->getOperand(2).isImmediate() && MI->getOperand(3).isRegister() && MI->getOperand(4).isImmediate() && diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 63bc7bd09d1..64571525cd4 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -104,10 +104,11 @@ def R32 : RegisterClass<"X86", i32, 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP] }]; } -// FIXME: These registers can contain both integer and fp values. We should -// figure out the right way to deal with that. For now, since they'll be used -// for scalar FP, they are being declared f64 -def RXMM : RegisterClass<"X86", f64, 32, +// V4F4, the 4 x f32 class, and V2F8, the 2 x f64 class, which we will use for +// Scalar SSE2 floating point support. +def V4F4 : RegisterClass<"X86", f32, 32, + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>; +def V2F8 : RegisterClass<"X86", f64, 64, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>; // FIXME: This sets up the floating point register files as though they are f64