mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	Get closer to fully working scalar FP in SSE regs. This gets singlesource
working, and Olden/power. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22441 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -61,7 +61,7 @@ def IntelAsmWriter : AsmWriter { | |||||||
|  |  | ||||||
| def X86 : Target { | def X86 : Target { | ||||||
|   // Specify the callee saved registers. |   // Specify the callee saved registers. | ||||||
|   let CalleeSavedRegisters = [ESI, EDI, EBX, EBP, XMM4, XMM5, XMM6, XMM7]; |   let CalleeSavedRegisters = [ESI, EDI, EBX, EBP]; | ||||||
|  |  | ||||||
|   // Yes, pointers are 32-bits in size. |   // Yes, pointers are 32-bits in size. | ||||||
|   let PointerType = i32; |   let PointerType = i32; | ||||||
|   | |||||||
| @@ -1687,9 +1687,9 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT, | |||||||
|     /*missing*/0,  /*missing*/0, X86::FCMOVB , X86::FCMOVBE, |     /*missing*/0,  /*missing*/0, X86::FCMOVB , X86::FCMOVBE, | ||||||
|     X86::FCMOVA ,  X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP |     X86::FCMOVA ,  X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP | ||||||
|   }; |   }; | ||||||
|   static const unsigned SSE_CMOVTAB[] = { |   static const int SSE_CMOVTAB[] = { | ||||||
|     0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */, |     0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */, | ||||||
|     2 /* CMPLESS */, 1 /* CMPLTSS */, /*missing*/0, /*missing*/0, |     1 /* CMPLTSS */, 2 /* CMPLESS */, /*missing*/0, /*missing*/0, | ||||||
|     /*missing*/0,  /*missing*/0, /*missing*/0, /*missing*/0 |     /*missing*/0,  /*missing*/0, /*missing*/0, /*missing*/0 | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
| @@ -1761,33 +1761,12 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT, | |||||||
|   // There's no SSE equivalent of FCMOVE.  In some cases we can fake it up, in |   // There's no SSE equivalent of FCMOVE.  In some cases we can fake it up, in | ||||||
|   // Others we will have to do the PowerPC thing and generate an MBB for the |   // Others we will have to do the PowerPC thing and generate an MBB for the | ||||||
|   // true and false values and select between them with a PHI. |   // true and false values and select between them with a PHI. | ||||||
|   if (X86ScalarSSE) {  |   if (X86ScalarSSE && (SVT == MVT::f32 || SVT == MVT::f64)) {  | ||||||
|     if (CondCode != NOT_SET) { |     if (0 && CondCode != NOT_SET) { | ||||||
|       unsigned CMPSOpc = (SVT == MVT::f64) ? X86::CMPSDrr : X86::CMPSSrr; |       // FIXME: check for min and max | ||||||
|       unsigned CMPSImm = SSE_CMOVTAB[CondCode]; |  | ||||||
|       // FIXME check for min |  | ||||||
|       // FIXME check for max |  | ||||||
|       // FIXME check for reverse |  | ||||||
|       unsigned LHS = SelectExpr(Cond.getOperand(0)); |  | ||||||
|       unsigned RHS = SelectExpr(Cond.getOperand(1)); |  | ||||||
|       // emit compare mask |  | ||||||
|       unsigned MaskReg = MakeReg(SVT); |  | ||||||
|       BuildMI(BB, CMPSOpc, 3, MaskReg).addReg(LHS).addReg(RHS).addImm(CMPSImm); |  | ||||||
|       // emit and with mask |  | ||||||
|       unsigned TrueMask = MakeReg(SVT); |  | ||||||
|       unsigned AndOpc = (SVT == MVT::f32) ? X86::ANDPSrr : X86::ANDPDrr; |  | ||||||
|       BuildMI(BB, AndOpc, 2, TrueMask).addReg(RTrue).addReg(MaskReg); |  | ||||||
|       // emit and with inverse mask |  | ||||||
|       unsigned FalseMask = MakeReg(SVT); |  | ||||||
|       unsigned AndnOpc = (SVT == MVT::f32) ? X86::ANDNPSrr : X86::ANDNPDrr; |  | ||||||
|       BuildMI(BB, AndnOpc, 2, FalseMask).addReg(RFalse).addReg(MaskReg); |  | ||||||
|       // emit or into dest reg |  | ||||||
|       unsigned OROpc = (SVT == MVT::f32) ? X86::ORPSrr : X86::ORPDrr; |  | ||||||
|       BuildMI(BB, OROpc, 2, RDest).addReg(TrueMask).addReg(FalseMask); |  | ||||||
|       return; |  | ||||||
|     } else { |     } else { | ||||||
|       // do the test and branch thing |       // FIXME: emit a direct compare and branch rather than setting a cond reg | ||||||
|       // Get the condition into the zero flag. |       //        and testing it. | ||||||
|       unsigned CondReg = SelectExpr(Cond); |       unsigned CondReg = SelectExpr(Cond); | ||||||
|       BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); |       BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); | ||||||
|  |  | ||||||
| @@ -2184,6 +2163,11 @@ unsigned ISel::SelectExpr(SDOperand N) { | |||||||
|     Tmp1 = SelectExpr(N.getOperand(0)); |     Tmp1 = SelectExpr(N.getOperand(0)); | ||||||
|     BuildMI(BB, X86::CVTSS2SDrr, 1, Result).addReg(Tmp1); |     BuildMI(BB, X86::CVTSS2SDrr, 1, Result).addReg(Tmp1); | ||||||
|     return Result; |     return Result; | ||||||
|  |   case ISD::FP_ROUND: | ||||||
|  |     assert(X86ScalarSSE && "Scalar SSE FP must be enabled to use f32");  | ||||||
|  |     Tmp1 = SelectExpr(N.getOperand(0)); | ||||||
|  |     BuildMI(BB, X86::CVTSD2SSrr, 1, Result).addReg(Tmp1); | ||||||
|  |     return Result; | ||||||
|   case ISD::CopyFromReg: |   case ISD::CopyFromReg: | ||||||
|     Select(N.getOperand(0)); |     Select(N.getOperand(0)); | ||||||
|     if (Result == 1) { |     if (Result == 1) { | ||||||
| @@ -2482,9 +2466,9 @@ unsigned ISel::SelectExpr(SDOperand N) { | |||||||
|     // CVTSD2SI instructions. |     // CVTSD2SI instructions. | ||||||
|     if (ISD::FP_TO_SINT == N.getOpcode() && X86ScalarSSE) { |     if (ISD::FP_TO_SINT == N.getOpcode() && X86ScalarSSE) { | ||||||
|       if (MVT::f32 == N.getOperand(0).getValueType()) { |       if (MVT::f32 == N.getOperand(0).getValueType()) { | ||||||
|         BuildMI(BB, X86::CVTSS2SIrr, 1, Result).addReg(Tmp1); |         BuildMI(BB, X86::CVTTSS2SIrr, 1, Result).addReg(Tmp1); | ||||||
|       } else if (MVT::f64 == N.getOperand(0).getValueType()) { |       } else if (MVT::f64 == N.getOperand(0).getValueType()) { | ||||||
|         BuildMI(BB, X86::CVTSD2SIrr, 1, Result).addReg(Tmp1); |         BuildMI(BB, X86::CVTTSD2SIrr, 1, Result).addReg(Tmp1); | ||||||
|       } else { |       } else { | ||||||
|         assert(0 && "Not an f32 or f64?"); |         assert(0 && "Not an f32 or f64?"); | ||||||
|         abort(); |         abort(); | ||||||
| @@ -4485,8 +4469,18 @@ void ISel::Select(SDOperand N) { | |||||||
|         SelectAddress(N.getOperand(2), AM); |         SelectAddress(N.getOperand(2), AM); | ||||||
|         Select(N.getOperand(0)); |         Select(N.getOperand(0)); | ||||||
|       } |       } | ||||||
|       addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1), |       GlobalValue *GV = GA->getGlobal(); | ||||||
|                      AM).addGlobalAddress(GA->getGlobal()); |       // For Darwin, external and weak symbols are indirect, so we want to load | ||||||
|  |       // the value at address GV, not the value of GV itself. | ||||||
|  |       if (Subtarget->getIndirectExternAndWeakGlobals() &&  | ||||||
|  |           (GV->hasWeakLinkage() || GV->isExternal())) { | ||||||
|  |         Tmp1 = MakeReg(MVT::i32); | ||||||
|  |         BuildMI(BB, X86::MOV32rm, 4, Tmp1).addReg(0).addZImm(1).addReg(0) | ||||||
|  |           .addGlobalAddress(GV, false, 0); | ||||||
|  |         addFullAddress(BuildMI(BB, X86::MOV32mr, 4+1),AM).addReg(Tmp1); | ||||||
|  |       } else { | ||||||
|  |         addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1),AM).addGlobalAddress(GV); | ||||||
|  |       } | ||||||
|       return; |       return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -20,6 +20,9 @@ class X86MemOperand<ValueType Ty> : Operand<Ty> { | |||||||
|   let NumMIOperands = 4; |   let NumMIOperands = 4; | ||||||
|   let PrintMethod = "printMemoryOperand"; |   let PrintMethod = "printMemoryOperand"; | ||||||
| } | } | ||||||
|  | def SSECC : Operand<i8> { | ||||||
|  |   let PrintMethod = "printSSECC"; | ||||||
|  | } | ||||||
|  |  | ||||||
| def i8mem  : X86MemOperand<i8>; | def i8mem  : X86MemOperand<i8>; | ||||||
| def i16mem : X86MemOperand<i16>; | def i16mem : X86MemOperand<i16>; | ||||||
| @@ -188,7 +191,7 @@ def JG  : IBr<0x8F, (ops i32imm:$dst), "jg $dst">, TB; | |||||||
| let isCall = 1 in | let isCall = 1 in | ||||||
|   // All calls clobber the non-callee saved registers... |   // All calls clobber the non-callee saved registers... | ||||||
|   let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, |   let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, | ||||||
|               XMM0, XMM1, XMM2, XMM3] in { |               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in { | ||||||
|     def CALLpcrel32 : I<0xE8, RawFrm, (ops calltarget:$dst), "call $dst">; |     def CALLpcrel32 : I<0xE8, RawFrm, (ops calltarget:$dst), "call $dst">; | ||||||
|     def CALL32r     : I<0xFF, MRM2r, (ops R32:$dst), "call {*}$dst">; |     def CALL32r     : I<0xFF, MRM2r, (ops R32:$dst), "call {*}$dst">; | ||||||
|     def CALL32m     : I<0xFF, MRM2m, (ops i32mem:$dst), "call {*}$dst">; |     def CALL32m     : I<0xFF, MRM2m, (ops i32mem:$dst), "call {*}$dst">; | ||||||
| @@ -1425,17 +1428,21 @@ def MOVAPDrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), | |||||||
| def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src), | def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src), | ||||||
|                 "movapd {$src, $dst|$dst, $src}">, TB, OpSize; |                 "movapd {$src, $dst|$dst, $src}">, TB, OpSize; | ||||||
|  |  | ||||||
| def CVTSD2SIrr: I<0x2D, MRMSrcReg, (ops R32:$dst, RXMM:$src), | def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src), | ||||||
|                 "cvtsd2si {$src, $dst|$dst, $src}">, XD; |                 "cvttsd2si {$src, $dst|$dst, $src}">, XD; | ||||||
| def CVTSD2SIrm: I<0x2D, MRMSrcMem, (ops R32:$dst, f64mem:$src), | def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src), | ||||||
|                 "cvtsd2si {$src, $dst|$dst, $src}">, XD; |                 "cvttsd2si {$src, $dst|$dst, $src}">, XD; | ||||||
| def CVTSS2SIrr: I<0x2D, MRMSrcReg, (ops R32:$dst, RXMM:$src), | def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src), | ||||||
|                 "cvtss2si {$src, $dst|$dst, $src}">, XS; |                 "cvttss2si {$src, $dst|$dst, $src}">, XS; | ||||||
| def CVTSS2SIrm: I<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src), | def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src), | ||||||
|                 "cvtss2si {$src, $dst|$dst, $src}">, XS; |                 "cvttss2si {$src, $dst|$dst, $src}">, XS; | ||||||
| def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops R32:$dst, RXMM:$src), | def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), | ||||||
|  |                 "cvtsd2ss {$src, $dst|$dst, $src}">, XS; | ||||||
|  | def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f64mem:$src),  | ||||||
|  |                 "cvtsd2ss {$src, $dst|$dst, $src}">, XS; | ||||||
|  | def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), | ||||||
|                 "cvtss2sd {$src, $dst|$dst, $src}">, XD; |                 "cvtss2sd {$src, $dst|$dst, $src}">, XD; | ||||||
| def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops R32:$dst, f32mem:$src), | def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), | ||||||
|                 "cvtss2sd {$src, $dst|$dst, $src}">, XD; |                 "cvtss2sd {$src, $dst|$dst, $src}">, XD; | ||||||
| def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src), | def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src), | ||||||
|                 "cvtsi2ss {$src, $dst|$dst, $src}">, XS; |                 "cvtsi2ss {$src, $dst|$dst, $src}">, XS; | ||||||
| @@ -1515,17 +1522,17 @@ def SUBSDrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), | |||||||
|                 "subsd {$src, $dst|$dst, $src}">, XD; |                 "subsd {$src, $dst|$dst, $src}">, XD; | ||||||
|  |  | ||||||
| def CMPSSrr : I<0xC2, MRMSrcReg,  | def CMPSSrr : I<0xC2, MRMSrcReg,  | ||||||
|                 (ops RXMM:$dst, RXMM:$src1, RXMM:$src, i8imm:$pred), |                 (ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc), | ||||||
|                 "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XS; |                 "cmp${cc}ss {$src, $dst|$dst, $src}">, XS; | ||||||
| def CMPSSrm : I<0xC2, MRMSrcMem,  | def CMPSSrm : I<0xC2, MRMSrcMem,  | ||||||
|                 (ops RXMM:$dst, RXMM:$src1, f32mem:$src, i8imm:$pred), |                 (ops RXMM:$dst, RXMM:$src1, f32mem:$src, SSECC:$cc), | ||||||
|                 "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XS; |                 "cmp${cc}ss {$src, $dst|$dst, $src}">, XS; | ||||||
| def CMPSDrr : I<0xC2, MRMSrcReg,  | def CMPSDrr : I<0xC2, MRMSrcReg,  | ||||||
|                 (ops RXMM:$dst, RXMM:$src1, RXMM:$src, i8imm:$pred), |                 (ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc), | ||||||
|                 "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XD; |                 "cmp${cc}sd {$src, $dst|$dst, $src}">, XD; | ||||||
| def CMPSDrm : I<0xC2, MRMSrcMem,  | def CMPSDrm : I<0xC2, MRMSrcMem,  | ||||||
|                 (ops RXMM:$dst, RXMM:$src1, f64mem:$src, i8imm:$pred), |                 (ops RXMM:$dst, RXMM:$src1, f64mem:$src, SSECC:$cc), | ||||||
|                 "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XD; |                 "cmp${cc}sd {$src, $dst|$dst, $src}">, XD; | ||||||
| } | } | ||||||
|  |  | ||||||
| //===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user