mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-04 06:09:05 +00:00
Make the use of the vmla and vmls VFP instructions controllable via cmd line.
Preliminary testing shows significant performance wins by not using these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99436 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
044be39090
commit
2676737e5e
@ -1369,6 +1369,20 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
|
||||
let Inst{4} = op4;
|
||||
}
|
||||
|
||||
// Double precision, binary, VML[AS] (for additional predicate)
|
||||
class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
|
||||
dag iops, InstrItinClass itin, string opc, string asm,
|
||||
list<dag> pattern>
|
||||
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
|
||||
let Inst{27-23} = opcod1;
|
||||
let Inst{21-20} = opcod2;
|
||||
let Inst{11-8} = 0b1011;
|
||||
let Inst{6} = op6;
|
||||
let Inst{4} = op4;
|
||||
list<Predicate> Predicates = [HasVFP2, UseVMLx];
|
||||
}
|
||||
|
||||
|
||||
// Single precision, unary
|
||||
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
|
||||
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
|
||||
|
@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
|
||||
def UseMovt : Predicate<"Subtarget->useMovt()">;
|
||||
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
|
||||
|
||||
def UseVMLx : Predicate<"Subtarget->useVMLx()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM Flag Definitions.
|
||||
|
||||
|
@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
|
||||
// FP FMA Operations.
|
||||
//
|
||||
|
||||
def VMLAD : ADbI<0b11100, 0b00, 0, 0,
|
||||
def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
|
||||
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
|
||||
IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
|
||||
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
|
||||
@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
|
||||
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
|
||||
def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
|
||||
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
|
||||
IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
|
||||
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
|
||||
@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
|
||||
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
|
||||
def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
|
||||
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
|
||||
IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
|
||||
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
|
||||
@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
|
||||
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
|
||||
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
|
||||
|
||||
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
|
||||
def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
|
||||
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
|
||||
IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
|
||||
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
|
||||
|
@ -26,6 +26,10 @@ static cl::opt<bool>
|
||||
UseNEONFP("arm-use-neon-fp",
|
||||
cl::desc("Use NEON for single-precision FP"),
|
||||
cl::init(false), cl::Hidden);
|
||||
static cl::opt<bool>
|
||||
UseVMLxInstructions("arm-use-vmlx",
|
||||
cl::desc("Use VFP vmla and vmls instructions"),
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
UseMOVT("arm-use-movt",
|
||||
@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
|
||||
: ARMArchVersion(V4)
|
||||
, ARMFPUType(None)
|
||||
, UseNEONForSinglePrecisionFP(UseNEONFP)
|
||||
, UseVMLx(UseVMLxInstructions)
|
||||
, IsThumb(isT)
|
||||
, ThumbMode(Thumb1)
|
||||
, PostRAScheduler(false)
|
||||
|
@ -50,6 +50,10 @@ protected:
|
||||
/// determine if NEON should actually be used.
|
||||
bool UseNEONForSinglePrecisionFP;
|
||||
|
||||
/// UseVMLx - If the VFP2 instructions are available, indicates whether
|
||||
/// the VML[AS] instructions should be used.
|
||||
bool UseVMLx;
|
||||
|
||||
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
|
||||
bool IsThumb;
|
||||
|
||||
@ -119,6 +123,7 @@ protected:
|
||||
bool hasNEON() const { return ARMFPUType >= NEON; }
|
||||
bool useNEONForSinglePrecisionFP() const {
|
||||
return hasNEON() && UseNEONForSinglePrecisionFP; }
|
||||
bool useVMLx() const {return hasVFP2() && UseVMLx; }
|
||||
|
||||
bool hasFP16() const { return HasFP16; }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user