Properly split f32 and f64 into separate register classes for scalar sse fp

fixing a bunch of nasty hackery


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23735 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2005-10-14 22:06:00 +00:00
parent e0de44adba
commit 14e2cf62f4
5 changed files with 77 additions and 78 deletions

View File

@ -184,8 +184,8 @@ namespace {
if (X86ScalarSSE) {
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::RXMMRegisterClass);
addRegisterClass(MVT::f64, X86::RXMMRegisterClass);
addRegisterClass(MVT::f32, X86::V4F4RegisterClass);
addRegisterClass(MVT::f64, X86::V2F8RegisterClass);
// SSE has no load+extend ops
setOperationAction(ISD::EXTLOAD, MVT::f32, Expand);
@ -4192,10 +4192,10 @@ void ISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::MOV8rr; break;
case MVT::i16: Opc = X86::MOV16rr; break;
case MVT::i32: Opc = X86::MOV32rr; break;
case MVT::f32: Opc = X86::MOVAPSrr; break;
case MVT::f32: Opc = X86::MOVSSrr; break;
case MVT::f64:
if (X86ScalarSSE) {
Opc = X86::MOVAPDrr;
Opc = X86::MOVSDrr;
} else {
Opc = X86::FpMOV;
ContainsFPCode = true;

View File

@ -28,7 +28,7 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned& destReg) const {
MachineOpCode oc = MI.getOpcode();
if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
oc == X86::FpMOV || oc == X86::MOVAPDrr || oc == X86::MOVAPSrr) {
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr) {
assert(MI.getNumOperands() == 2 &&
MI.getOperand(0).isRegister() &&
MI.getOperand(1).isRegister() &&

View File

@ -1408,142 +1408,134 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (ops R32:$dst, i16mem:$src),
// XMM Floating point support (requires SSE2)
//===----------------------------------------------------------------------===//
def MOVSSrm : I<0x10, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
def MOVSSrr : I<0x10, MRMSrcReg, (ops V4F4:$dst, V4F4:$src),
"movss {$src, $dst|$dst, $src}">, XS;
def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, RXMM:$src),
def MOVSSrm : I<0x10, MRMSrcMem, (ops V4F4:$dst, f32mem:$src),
"movss {$src, $dst|$dst, $src}">, XS;
def MOVSDrm : I<0x10, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, V4F4:$src),
"movss {$src, $dst|$dst, $src}">, XS;
def MOVSDrr : I<0x10, MRMSrcReg, (ops V2F8:$dst, V2F8:$src),
"movsd {$src, $dst|$dst, $src}">, XD;
def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, RXMM:$src),
def MOVSDrm : I<0x10, MRMSrcMem, (ops V2F8:$dst, f64mem:$src),
"movsd {$src, $dst|$dst, $src}">, XD;
def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, V2F8:$src),
"movsd {$src, $dst|$dst, $src}">, XD;
def MOVAPSrr: I<0x28, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
"movaps {$src, $dst|$dst, $src}">, TB;
def MOVAPSrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
"movaps {$src, $dst|$dst, $src}">, TB;
def MOVAPSmr: I<0x29, MRMDestMem, (ops f32mem:$dst, RXMM:$src),
"movaps {$src, $dst|$dst, $src}">, TB;
def MOVAPDrr: I<0x28, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
"movapd {$src, $dst|$dst, $src}">, TB, OpSize;
def MOVAPDrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
"movapd {$src, $dst|$dst, $src}">, TB, OpSize;
def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src),
"movapd {$src, $dst|$dst, $src}">, TB, OpSize;
def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src),
def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, V2F8:$src),
"cvttsd2si {$src, $dst|$dst, $src}">, XD;
def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
"cvttsd2si {$src, $dst|$dst, $src}">, XD;
def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src),
def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, V4F4:$src),
"cvttss2si {$src, $dst|$dst, $src}">, XS;
def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
"cvttss2si {$src, $dst|$dst, $src}">, XS;
def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops V4F4:$dst, V2F8:$src),
"cvtsd2ss {$src, $dst|$dst, $src}">, XS;
def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops V4F4:$dst, f64mem:$src),
"cvtsd2ss {$src, $dst|$dst, $src}">, XS;
def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops V2F8:$dst, V4F4:$src),
"cvtss2sd {$src, $dst|$dst, $src}">, XD;
def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops V2F8:$dst, f32mem:$src),
"cvtss2sd {$src, $dst|$dst, $src}">, XD;
def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src),
def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops V4F4:$dst, R32:$src),
"cvtsi2ss {$src, $dst|$dst, $src}">, XS;
def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops R32:$dst, f32mem:$src),
def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops V4F4:$dst, i32mem:$src),
"cvtsi2ss {$src, $dst|$dst, $src}">, XS;
def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src),
def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops V2F8:$dst, R32:$src),
"cvtsi2sd {$src, $dst|$dst, $src}">, XD;
def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops R32:$dst, f64mem:$src),
def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops V2F8:$dst, i32mem:$src),
"cvtsi2sd {$src, $dst|$dst, $src}">, XD;
def SQRTSSrm : I<0x51, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
def SQRTSSrm : I<0x51, MRMSrcMem, (ops V4F4:$dst, f32mem:$src),
"subss {$src, $dst|$dst, $src}">, XS;
def SQRTSSrr : I<0x51, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
def SQRTSSrr : I<0x51, MRMSrcReg, (ops V4F4:$dst, V4F4:$src),
"subss {$src, $dst|$dst, $src}">, XS;
def SQRTSDrm : I<0x51, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
def SQRTSDrm : I<0x51, MRMSrcMem, (ops V2F8:$dst, f64mem:$src),
"subsd {$src, $dst|$dst, $src}">, XD;
def SQRTSDrr : I<0x51, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
def SQRTSDrr : I<0x51, MRMSrcReg, (ops V2F8:$dst, V2F8:$src),
"subsd {$src, $dst|$dst, $src}">, XD;
def UCOMISDrr: I<0x2E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
def UCOMISDrr: I<0x2E, MRMSrcReg, (ops V2F8:$dst, V2F8:$src),
"ucomisd {$src, $dst|$dst, $src}">, TB, OpSize;
def UCOMISDrm: I<0x2E, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
def UCOMISDrm: I<0x2E, MRMSrcMem, (ops V2F8:$dst, f64mem:$src),
"ucomisd {$src, $dst|$dst, $src}">, TB, OpSize;
def UCOMISSrr: I<0x2E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
def UCOMISSrr: I<0x2E, MRMSrcReg, (ops V4F4:$dst, V4F4:$src),
"ucomiss {$src, $dst|$dst, $src}">, TB;
def UCOMISSrm: I<0x2E, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
def UCOMISSrm: I<0x2E, MRMSrcMem, (ops V4F4:$dst, f32mem:$src),
"ucomiss {$src, $dst|$dst, $src}">, TB;
// Pseudo-instructions that map to fld0 to xorps/xorpd for sse.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
def FLD0SS : I<0x57, MRMSrcReg, (ops RXMM:$dst),
def FLD0SS : I<0x57, MRMSrcReg, (ops V4F4:$dst),
"xorps $dst, $dst">, TB;
def FLD0SD : I<0x57, MRMSrcReg, (ops RXMM:$dst),
def FLD0SD : I<0x57, MRMSrcReg, (ops V2F8:$dst),
"xorpd $dst, $dst">, TB, OpSize;
let isTwoAddress = 1 in {
let isCommutable = 1 in {
def ADDSSrr : I<0x58, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ADDSSrr : I<0x58, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"addss {$src, $dst|$dst, $src}">, XS;
def ADDSDrr : I<0x58, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ADDSDrr : I<0x58, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"addsd {$src, $dst|$dst, $src}">, XD;
def ANDPSrr : I<0x54, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"andps {$src, $dst|$dst, $src}">, TB;
def ANDPDrr : I<0x54, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"andpd {$src, $dst|$dst, $src}">, TB, OpSize;
def MULSSrr : I<0x59, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def MULSSrr : I<0x59, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"mulss {$src, $dst|$dst, $src}">, XS;
def MULSDrr : I<0x59, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def MULSDrr : I<0x59, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"mulsd {$src, $dst|$dst, $src}">, XD;
def ORPSrr : I<0x56, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ORPSrr : I<0x56, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"orps {$src, $dst|$dst, $src}">, TB;
def ORPDrr : I<0x56, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ORPDrr : I<0x56, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"orpd {$src, $dst|$dst, $src}">, TB, OpSize;
def XORPSrr : I<0x57, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def XORPSrr : I<0x57, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"xorps {$src, $dst|$dst, $src}">, TB;
def XORPDrr : I<0x57, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def XORPDrr : I<0x57, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"xorpd {$src, $dst|$dst, $src}">, TB, OpSize;
}
def ANDNPSrr : I<0x55, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"andnps {$src, $dst|$dst, $src}">, TB;
def ANDNPDrr : I<0x55, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"andnpd {$src, $dst|$dst, $src}">, TB, OpSize;
def ADDSSrm : I<0x58, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src),
def ADDSSrm : I<0x58, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src),
"addss {$src, $dst|$dst, $src}">, XS;
def ADDSDrm : I<0x58, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src),
def ADDSDrm : I<0x58, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src),
"addsd {$src, $dst|$dst, $src}">, XD;
def MULSSrm : I<0x59, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src),
def MULSSrm : I<0x59, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src),
"mulss {$src, $dst|$dst, $src}">, XS;
def MULSDrm : I<0x59, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src),
def MULSDrm : I<0x59, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src),
"mulsd {$src, $dst|$dst, $src}">, XD;
def DIVSSrm : I<0x5E, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src),
def DIVSSrm : I<0x5E, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src),
"divss {$src, $dst|$dst, $src}">, XS;
def DIVSSrr : I<0x5E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def DIVSSrr : I<0x5E, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"divss {$src, $dst|$dst, $src}">, XS;
def DIVSDrm : I<0x5E, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src),
def DIVSDrm : I<0x5E, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src),
"divsd {$src, $dst|$dst, $src}">, XD;
def DIVSDrr : I<0x5E, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def DIVSDrr : I<0x5E, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"divsd {$src, $dst|$dst, $src}">, XD;
def SUBSSrm : I<0x5C, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f32mem:$src),
def SUBSSrm : I<0x5C, MRMSrcMem, (ops V4F4:$dst, V4F4:$src1, f32mem:$src),
"subss {$src, $dst|$dst, $src}">, XS;
def SUBSSrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def SUBSSrr : I<0x5C, MRMSrcReg, (ops V4F4:$dst, V4F4:$src1, V4F4:$src),
"subss {$src, $dst|$dst, $src}">, XS;
def SUBSDrm : I<0x5C, MRMSrcMem, (ops RXMM:$dst, RXMM:$src1, f64mem:$src),
def SUBSDrm : I<0x5C, MRMSrcMem, (ops V2F8:$dst, V2F8:$src1, f64mem:$src),
"subsd {$src, $dst|$dst, $src}">, XD;
def SUBSDrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
def SUBSDrr : I<0x5C, MRMSrcReg, (ops V2F8:$dst, V2F8:$src1, V2F8:$src),
"subsd {$src, $dst|$dst, $src}">, XD;
def CMPSSrr : I<0xC2, MRMSrcReg,
(ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc),
(ops V4F4:$dst, V4F4:$src1, V4F4:$src, SSECC:$cc),
"cmp${cc}ss {$src, $dst|$dst, $src}">, XS;
def CMPSSrm : I<0xC2, MRMSrcMem,
(ops RXMM:$dst, RXMM:$src1, f32mem:$src, SSECC:$cc),
(ops V4F4:$dst, V4F4:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss {$src, $dst|$dst, $src}">, XS;
def CMPSDrr : I<0xC2, MRMSrcReg,
(ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc),
(ops V2F8:$dst, V2F8:$src1, V2F8:$src, SSECC:$cc),
"cmp${cc}sd {$src, $dst|$dst, $src}">, XD;
def CMPSDrm : I<0xC2, MRMSrcMem,
(ops RXMM:$dst, RXMM:$src1, f64mem:$src, SSECC:$cc),
(ops V2F8:$dst, V2F8:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd {$src, $dst|$dst, $src}">, XD;
}

View File

@ -57,7 +57,9 @@ void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opc = X86::MOV16mr;
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
Opc = X86::FST64m;
} else if (RC == &X86::RXMMRegClass) {
} else if (RC == &X86::V4F4RegClass) {
Opc = X86::MOVSSmr;
} else if (RC == &X86::V2F8RegClass) {
Opc = X86::MOVSDmr;
} else {
assert(0 && "Unknown regclass");
@ -79,7 +81,9 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opc = X86::MOV16rm;
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
Opc = X86::FLD64m;
} else if (RC == &X86::RXMMRegClass) {
} else if (RC == &X86::V4F4RegClass) {
Opc = X86::MOVSSrm;
} else if (RC == &X86::V2F8RegClass) {
Opc = X86::MOVSDrm;
} else {
assert(0 && "Unknown regclass");
@ -101,8 +105,10 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
Opc = X86::MOV16rr;
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
Opc = X86::FpMOV;
} else if (RC == &X86::RXMMRegClass) {
Opc = X86::MOVAPDrr;
} else if (RC == &X86::V4F4RegClass) {
Opc = X86::MOVSSrr;
} else if (RC == &X86::V2F8RegClass) {
Opc = X86::MOVSDrr;
} else {
assert(0 && "Unknown regclass");
abort();
@ -119,7 +125,7 @@ unsigned X86RegisterInfo::isLoadFromStackSlot(MachineInstr *MI,
case X86::MOV32rm:
case X86::FLD64m:
case X86::FLD80m:
case X86::MOVAPDrm:
case X86::MOVSSrm:
case X86::MOVSDrm:
if (MI->getOperand(1).isFrameIndex() && MI->getOperand(2).isImmediate() &&
MI->getOperand(3).isRegister() && MI->getOperand(4).isImmediate() &&

View File

@ -104,10 +104,11 @@ def R32 : RegisterClass<"X86", i32, 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]
}];
}
// FIXME: These registers can contain both integer and fp values. We should
// figure out the right way to deal with that. For now, since they'll be used
// for scalar FP, they are being declared f64
def RXMM : RegisterClass<"X86", f64, 32,
// V4F4, the 4 x f32 class, and V2F8, the 2 x f64 class, which we will use for
// Scalar SSE2 floating point support.
def V4F4 : RegisterClass<"X86", f32, 32,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
def V2F8 : RegisterClass<"X86", f64, 64,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
// FIXME: This sets up the floating point register files as though they are f64