mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
Get closer to fully working scalar FP in SSE regs. This gets singlesource
working, and Olden/power. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22441 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6c7cb29038
commit
16b04f3d5e
@ -61,7 +61,7 @@ def IntelAsmWriter : AsmWriter {
|
|||||||
|
|
||||||
def X86 : Target {
|
def X86 : Target {
|
||||||
// Specify the callee saved registers.
|
// Specify the callee saved registers.
|
||||||
let CalleeSavedRegisters = [ESI, EDI, EBX, EBP, XMM4, XMM5, XMM6, XMM7];
|
let CalleeSavedRegisters = [ESI, EDI, EBX, EBP];
|
||||||
|
|
||||||
// Yes, pointers are 32-bits in size.
|
// Yes, pointers are 32-bits in size.
|
||||||
let PointerType = i32;
|
let PointerType = i32;
|
||||||
|
@ -1687,9 +1687,9 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
|
|||||||
/*missing*/0, /*missing*/0, X86::FCMOVB , X86::FCMOVBE,
|
/*missing*/0, /*missing*/0, X86::FCMOVB , X86::FCMOVBE,
|
||||||
X86::FCMOVA , X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP
|
X86::FCMOVA , X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP
|
||||||
};
|
};
|
||||||
static const unsigned SSE_CMOVTAB[] = {
|
static const int SSE_CMOVTAB[] = {
|
||||||
0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */,
|
0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */,
|
||||||
2 /* CMPLESS */, 1 /* CMPLTSS */, /*missing*/0, /*missing*/0,
|
1 /* CMPLTSS */, 2 /* CMPLESS */, /*missing*/0, /*missing*/0,
|
||||||
/*missing*/0, /*missing*/0, /*missing*/0, /*missing*/0
|
/*missing*/0, /*missing*/0, /*missing*/0, /*missing*/0
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1761,33 +1761,12 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
|
|||||||
// There's no SSE equivalent of FCMOVE. In some cases we can fake it up, in
|
// There's no SSE equivalent of FCMOVE. In some cases we can fake it up, in
|
||||||
// Others we will have to do the PowerPC thing and generate an MBB for the
|
// Others we will have to do the PowerPC thing and generate an MBB for the
|
||||||
// true and false values and select between them with a PHI.
|
// true and false values and select between them with a PHI.
|
||||||
if (X86ScalarSSE) {
|
if (X86ScalarSSE && (SVT == MVT::f32 || SVT == MVT::f64)) {
|
||||||
if (CondCode != NOT_SET) {
|
if (0 && CondCode != NOT_SET) {
|
||||||
unsigned CMPSOpc = (SVT == MVT::f64) ? X86::CMPSDrr : X86::CMPSSrr;
|
// FIXME: check for min and max
|
||||||
unsigned CMPSImm = SSE_CMOVTAB[CondCode];
|
|
||||||
// FIXME check for min
|
|
||||||
// FIXME check for max
|
|
||||||
// FIXME check for reverse
|
|
||||||
unsigned LHS = SelectExpr(Cond.getOperand(0));
|
|
||||||
unsigned RHS = SelectExpr(Cond.getOperand(1));
|
|
||||||
// emit compare mask
|
|
||||||
unsigned MaskReg = MakeReg(SVT);
|
|
||||||
BuildMI(BB, CMPSOpc, 3, MaskReg).addReg(LHS).addReg(RHS).addImm(CMPSImm);
|
|
||||||
// emit and with mask
|
|
||||||
unsigned TrueMask = MakeReg(SVT);
|
|
||||||
unsigned AndOpc = (SVT == MVT::f32) ? X86::ANDPSrr : X86::ANDPDrr;
|
|
||||||
BuildMI(BB, AndOpc, 2, TrueMask).addReg(RTrue).addReg(MaskReg);
|
|
||||||
// emit and with inverse mask
|
|
||||||
unsigned FalseMask = MakeReg(SVT);
|
|
||||||
unsigned AndnOpc = (SVT == MVT::f32) ? X86::ANDNPSrr : X86::ANDNPDrr;
|
|
||||||
BuildMI(BB, AndnOpc, 2, FalseMask).addReg(RFalse).addReg(MaskReg);
|
|
||||||
// emit or into dest reg
|
|
||||||
unsigned OROpc = (SVT == MVT::f32) ? X86::ORPSrr : X86::ORPDrr;
|
|
||||||
BuildMI(BB, OROpc, 2, RDest).addReg(TrueMask).addReg(FalseMask);
|
|
||||||
return;
|
|
||||||
} else {
|
} else {
|
||||||
// do the test and branch thing
|
// FIXME: emit a direct compare and branch rather than setting a cond reg
|
||||||
// Get the condition into the zero flag.
|
// and testing it.
|
||||||
unsigned CondReg = SelectExpr(Cond);
|
unsigned CondReg = SelectExpr(Cond);
|
||||||
BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
|
BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
|
||||||
|
|
||||||
@ -2184,6 +2163,11 @@ unsigned ISel::SelectExpr(SDOperand N) {
|
|||||||
Tmp1 = SelectExpr(N.getOperand(0));
|
Tmp1 = SelectExpr(N.getOperand(0));
|
||||||
BuildMI(BB, X86::CVTSS2SDrr, 1, Result).addReg(Tmp1);
|
BuildMI(BB, X86::CVTSS2SDrr, 1, Result).addReg(Tmp1);
|
||||||
return Result;
|
return Result;
|
||||||
|
case ISD::FP_ROUND:
|
||||||
|
assert(X86ScalarSSE && "Scalar SSE FP must be enabled to use f32");
|
||||||
|
Tmp1 = SelectExpr(N.getOperand(0));
|
||||||
|
BuildMI(BB, X86::CVTSD2SSrr, 1, Result).addReg(Tmp1);
|
||||||
|
return Result;
|
||||||
case ISD::CopyFromReg:
|
case ISD::CopyFromReg:
|
||||||
Select(N.getOperand(0));
|
Select(N.getOperand(0));
|
||||||
if (Result == 1) {
|
if (Result == 1) {
|
||||||
@ -2482,9 +2466,9 @@ unsigned ISel::SelectExpr(SDOperand N) {
|
|||||||
// CVTSD2SI instructions.
|
// CVTSD2SI instructions.
|
||||||
if (ISD::FP_TO_SINT == N.getOpcode() && X86ScalarSSE) {
|
if (ISD::FP_TO_SINT == N.getOpcode() && X86ScalarSSE) {
|
||||||
if (MVT::f32 == N.getOperand(0).getValueType()) {
|
if (MVT::f32 == N.getOperand(0).getValueType()) {
|
||||||
BuildMI(BB, X86::CVTSS2SIrr, 1, Result).addReg(Tmp1);
|
BuildMI(BB, X86::CVTTSS2SIrr, 1, Result).addReg(Tmp1);
|
||||||
} else if (MVT::f64 == N.getOperand(0).getValueType()) {
|
} else if (MVT::f64 == N.getOperand(0).getValueType()) {
|
||||||
BuildMI(BB, X86::CVTSD2SIrr, 1, Result).addReg(Tmp1);
|
BuildMI(BB, X86::CVTTSD2SIrr, 1, Result).addReg(Tmp1);
|
||||||
} else {
|
} else {
|
||||||
assert(0 && "Not an f32 or f64?");
|
assert(0 && "Not an f32 or f64?");
|
||||||
abort();
|
abort();
|
||||||
@ -4485,8 +4469,18 @@ void ISel::Select(SDOperand N) {
|
|||||||
SelectAddress(N.getOperand(2), AM);
|
SelectAddress(N.getOperand(2), AM);
|
||||||
Select(N.getOperand(0));
|
Select(N.getOperand(0));
|
||||||
}
|
}
|
||||||
addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1),
|
GlobalValue *GV = GA->getGlobal();
|
||||||
AM).addGlobalAddress(GA->getGlobal());
|
// For Darwin, external and weak symbols are indirect, so we want to load
|
||||||
|
// the value at address GV, not the value of GV itself.
|
||||||
|
if (Subtarget->getIndirectExternAndWeakGlobals() &&
|
||||||
|
(GV->hasWeakLinkage() || GV->isExternal())) {
|
||||||
|
Tmp1 = MakeReg(MVT::i32);
|
||||||
|
BuildMI(BB, X86::MOV32rm, 4, Tmp1).addReg(0).addZImm(1).addReg(0)
|
||||||
|
.addGlobalAddress(GV, false, 0);
|
||||||
|
addFullAddress(BuildMI(BB, X86::MOV32mr, 4+1),AM).addReg(Tmp1);
|
||||||
|
} else {
|
||||||
|
addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1),AM).addGlobalAddress(GV);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,9 @@ class X86MemOperand<ValueType Ty> : Operand<Ty> {
|
|||||||
let NumMIOperands = 4;
|
let NumMIOperands = 4;
|
||||||
let PrintMethod = "printMemoryOperand";
|
let PrintMethod = "printMemoryOperand";
|
||||||
}
|
}
|
||||||
|
def SSECC : Operand<i8> {
|
||||||
|
let PrintMethod = "printSSECC";
|
||||||
|
}
|
||||||
|
|
||||||
def i8mem : X86MemOperand<i8>;
|
def i8mem : X86MemOperand<i8>;
|
||||||
def i16mem : X86MemOperand<i16>;
|
def i16mem : X86MemOperand<i16>;
|
||||||
@ -188,7 +191,7 @@ def JG : IBr<0x8F, (ops i32imm:$dst), "jg $dst">, TB;
|
|||||||
let isCall = 1 in
|
let isCall = 1 in
|
||||||
// All calls clobber the non-callee saved registers...
|
// All calls clobber the non-callee saved registers...
|
||||||
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
|
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
|
||||||
XMM0, XMM1, XMM2, XMM3] in {
|
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in {
|
||||||
def CALLpcrel32 : I<0xE8, RawFrm, (ops calltarget:$dst), "call $dst">;
|
def CALLpcrel32 : I<0xE8, RawFrm, (ops calltarget:$dst), "call $dst">;
|
||||||
def CALL32r : I<0xFF, MRM2r, (ops R32:$dst), "call {*}$dst">;
|
def CALL32r : I<0xFF, MRM2r, (ops R32:$dst), "call {*}$dst">;
|
||||||
def CALL32m : I<0xFF, MRM2m, (ops i32mem:$dst), "call {*}$dst">;
|
def CALL32m : I<0xFF, MRM2m, (ops i32mem:$dst), "call {*}$dst">;
|
||||||
@ -1425,17 +1428,21 @@ def MOVAPDrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
|
|||||||
def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src),
|
def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src),
|
||||||
"movapd {$src, $dst|$dst, $src}">, TB, OpSize;
|
"movapd {$src, $dst|$dst, $src}">, TB, OpSize;
|
||||||
|
|
||||||
def CVTSD2SIrr: I<0x2D, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
||||||
"cvtsd2si {$src, $dst|$dst, $src}">, XD;
|
"cvttsd2si {$src, $dst|$dst, $src}">, XD;
|
||||||
def CVTSD2SIrm: I<0x2D, MRMSrcMem, (ops R32:$dst, f64mem:$src),
|
def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
|
||||||
"cvtsd2si {$src, $dst|$dst, $src}">, XD;
|
"cvttsd2si {$src, $dst|$dst, $src}">, XD;
|
||||||
def CVTSS2SIrr: I<0x2D, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
||||||
"cvtss2si {$src, $dst|$dst, $src}">, XS;
|
"cvttss2si {$src, $dst|$dst, $src}">, XS;
|
||||||
def CVTSS2SIrm: I<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src),
|
def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
|
||||||
"cvtss2si {$src, $dst|$dst, $src}">, XS;
|
"cvttss2si {$src, $dst|$dst, $src}">, XS;
|
||||||
def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
|
||||||
|
"cvtsd2ss {$src, $dst|$dst, $src}">, XS;
|
||||||
|
def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),
|
||||||
|
"cvtsd2ss {$src, $dst|$dst, $src}">, XS;
|
||||||
|
def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src),
|
||||||
"cvtss2sd {$src, $dst|$dst, $src}">, XD;
|
"cvtss2sd {$src, $dst|$dst, $src}">, XD;
|
||||||
def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops R32:$dst, f32mem:$src),
|
def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f32mem:$src),
|
||||||
"cvtss2sd {$src, $dst|$dst, $src}">, XD;
|
"cvtss2sd {$src, $dst|$dst, $src}">, XD;
|
||||||
def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src),
|
||||||
"cvtsi2ss {$src, $dst|$dst, $src}">, XS;
|
"cvtsi2ss {$src, $dst|$dst, $src}">, XS;
|
||||||
@ -1515,17 +1522,17 @@ def SUBSDrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src),
|
|||||||
"subsd {$src, $dst|$dst, $src}">, XD;
|
"subsd {$src, $dst|$dst, $src}">, XD;
|
||||||
|
|
||||||
def CMPSSrr : I<0xC2, MRMSrcReg,
|
def CMPSSrr : I<0xC2, MRMSrcReg,
|
||||||
(ops RXMM:$dst, RXMM:$src1, RXMM:$src, i8imm:$pred),
|
(ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc),
|
||||||
"cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XS;
|
"cmp${cc}ss {$src, $dst|$dst, $src}">, XS;
|
||||||
def CMPSSrm : I<0xC2, MRMSrcMem,
|
def CMPSSrm : I<0xC2, MRMSrcMem,
|
||||||
(ops RXMM:$dst, RXMM:$src1, f32mem:$src, i8imm:$pred),
|
(ops RXMM:$dst, RXMM:$src1, f32mem:$src, SSECC:$cc),
|
||||||
"cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XS;
|
"cmp${cc}ss {$src, $dst|$dst, $src}">, XS;
|
||||||
def CMPSDrr : I<0xC2, MRMSrcReg,
|
def CMPSDrr : I<0xC2, MRMSrcReg,
|
||||||
(ops RXMM:$dst, RXMM:$src1, RXMM:$src, i8imm:$pred),
|
(ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc),
|
||||||
"cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XD;
|
"cmp${cc}sd {$src, $dst|$dst, $src}">, XD;
|
||||||
def CMPSDrm : I<0xC2, MRMSrcMem,
|
def CMPSDrm : I<0xC2, MRMSrcMem,
|
||||||
(ops RXMM:$dst, RXMM:$src1, f64mem:$src, i8imm:$pred),
|
(ops RXMM:$dst, RXMM:$src1, f64mem:$src, SSECC:$cc),
|
||||||
"cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XD;
|
"cmp${cc}sd {$src, $dst|$dst, $src}">, XD;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
Loading…
Reference in New Issue
Block a user