mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 21:35:07 +00:00
Use COPY_TO_REGCLASS instead of pseudo instructions for Neon FP patterns.
Jakob Olesen suggested that we can avoid the need for separate pseudo instructions here by using COPY_TO_REGCLASS in the patterns. The pattern gets pretty ugly but it seems to work well. Partial fix for Radar 8711675. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121718 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3a6756cb1c
commit
1e6f59608b
@ -54,7 +54,6 @@ namespace {
|
|||||||
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
|
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
|
||||||
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
|
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
|
||||||
unsigned Opc, bool IsExt, unsigned NumRegs);
|
unsigned Opc, bool IsExt, unsigned NumRegs);
|
||||||
void ExpandNeonSFP2(MachineBasicBlock::iterator &MBBI, unsigned Opc);
|
|
||||||
};
|
};
|
||||||
char ARMExpandPseudo::ID = 0;
|
char ARMExpandPseudo::ID = 0;
|
||||||
}
|
}
|
||||||
@ -613,21 +612,6 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
|
|||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ExpandNeonSFP2 - Translate a 2-register Neon pseudo instruction used for
|
|
||||||
/// scalar floating-point to a real instruction.
|
|
||||||
void ARMExpandPseudo::ExpandNeonSFP2(MachineBasicBlock::iterator &MBBI,
|
|
||||||
unsigned Opc) {
|
|
||||||
MachineInstr &MI = *MBBI;
|
|
||||||
MachineBasicBlock &MBB = *MI.getParent();
|
|
||||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
|
||||||
MIB.addOperand(MI.getOperand(0)) // destination register
|
|
||||||
.addOperand(MI.getOperand(1)) // source register
|
|
||||||
.addOperand(MI.getOperand(2)) // predicate
|
|
||||||
.addOperand(MI.getOperand(3)); // predicate register
|
|
||||||
TransferImpOps(MI, MIB, MIB);
|
|
||||||
MI.eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||||
bool Modified = false;
|
bool Modified = false;
|
||||||
|
|
||||||
@ -1167,13 +1151,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||||||
case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break;
|
case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break;
|
||||||
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break;
|
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break;
|
||||||
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break;
|
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break;
|
||||||
|
|
||||||
case ARM::VABSfd_sfp: ExpandNeonSFP2(MBBI, ARM::VABSfd); break;
|
|
||||||
case ARM::VNEGfd_sfp: ExpandNeonSFP2(MBBI, ARM::VNEGfd); break;
|
|
||||||
case ARM::VCVTf2sd_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTf2sd); break;
|
|
||||||
case ARM::VCVTf2ud_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTf2ud); break;
|
|
||||||
case ARM::VCVTs2fd_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTs2fd); break;
|
|
||||||
case ARM::VCVTu2fd_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTu2fd); break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ModifiedOp)
|
if (ModifiedOp)
|
||||||
|
@ -1669,8 +1669,6 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
|
|||||||
|
|
||||||
// Basic 2-register operations: single-, double- and quad-register.
|
// Basic 2-register operations: single-, double- and quad-register.
|
||||||
let neverHasSideEffects = 1 in
|
let neverHasSideEffects = 1 in
|
||||||
class N2VS
|
|
||||||
: PseudoNeonI<(outs DPR_VFP2:$Vd), (ins DPR_VFP2:$Vm), IIC_VUNAD, "", []>;
|
|
||||||
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||||
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
|
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
|
||||||
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
|
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
|
||||||
@ -4678,11 +4676,14 @@ def VTBX4Pseudo
|
|||||||
// NEON instructions for single-precision FP math
|
// NEON instructions for single-precision FP math
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, PseudoNeonI Inst>
|
class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
|
||||||
: NEONFPPat<(ResTy (OpNode SPR:$a)),
|
: NEONFPPat<(ResTy (OpNode SPR:$a)),
|
||||||
(EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
|
(EXTRACT_SUBREG
|
||||||
SPR:$a, ssub_0))),
|
(OpTy (COPY_TO_REGCLASS
|
||||||
ssub_0)>;
|
(OpTy (Inst (INSERT_SUBREG
|
||||||
|
(OpTy (COPY_TO_REGCLASS (OpTy (IMPLICIT_DEF)), DPR_VFP2)),
|
||||||
|
SPR:$a, ssub_0))),
|
||||||
|
DPR_VFP2)), ssub_0)>;
|
||||||
|
|
||||||
class N3VSPat<SDNode OpNode, NeonI Inst>
|
class N3VSPat<SDNode OpNode, NeonI Inst>
|
||||||
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
|
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
|
||||||
@ -4736,12 +4737,10 @@ def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>,
|
|||||||
Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
|
Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
|
||||||
|
|
||||||
// Vector Absolute used for single-precision FP
|
// Vector Absolute used for single-precision FP
|
||||||
def VABSfd_sfp : N2VS;
|
def : N2VSPat<fabs, f32, v2f32, VABSfd>;
|
||||||
def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
|
|
||||||
|
|
||||||
// Vector Negate used for single-precision FP
|
// Vector Negate used for single-precision FP
|
||||||
def VNEGfd_sfp : N2VS;
|
def : N2VSPat<fneg, f32, v2f32, VNEGfd>;
|
||||||
def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
|
|
||||||
|
|
||||||
// Vector Maximum used for single-precision FP
|
// Vector Maximum used for single-precision FP
|
||||||
let neverHasSideEffects = 1 in
|
let neverHasSideEffects = 1 in
|
||||||
@ -4758,17 +4757,10 @@ def VMINfd_sfp : N3V<0, 0, 0b10, 0b1111, 0, 0, (outs DPR_VFP2:$Vd),
|
|||||||
def : N3VSPat<NEONfmin, VMINfd_sfp>;
|
def : N3VSPat<NEONfmin, VMINfd_sfp>;
|
||||||
|
|
||||||
// Vector Convert between single-precision FP and integer
|
// Vector Convert between single-precision FP and integer
|
||||||
def VCVTf2sd_sfp : N2VS;
|
def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd>;
|
||||||
def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
|
def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud>;
|
||||||
|
def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd>;
|
||||||
def VCVTf2ud_sfp : N2VS;
|
def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd>;
|
||||||
def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
|
|
||||||
|
|
||||||
def VCVTs2fd_sfp : N2VS;
|
|
||||||
def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
|
|
||||||
|
|
||||||
def VCVTu2fd_sfp : N2VS;
|
|
||||||
def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Non-Instruction Patterns
|
// Non-Instruction Patterns
|
||||||
|
Loading…
x
Reference in New Issue
Block a user