mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-30 02:32:08 +00:00
e54885af9b
in-tree implementations of TargetLoweringBase::isFMAFasterThanMulAndAdd in order to resolve the following issues with fmuladd (i.e. optional FMA) intrinsics: 1. On X86(-64) targets, ISD::FMA nodes are formed when lowering fmuladd intrinsics even if the subtarget does not support FMA instructions, leading to laughably bad code generation in some situations. 2. On AArch64 targets, ISD::FMA nodes are formed for operations on fp128, resulting in a call to a software fp128 FMA implementation. 3. On PowerPC targets, FMAs are not generated from fmuladd intrinsics on types like v2f32, v8f32, v4f64, etc., even though they promote, split, scalarize, etc. to types that support hardware FMAs. The function has also been slightly renamed for consistency and to force a merge/build conflict for any out-of-tree target implementing it. To resolve, see comments and fixed in-tree examples. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185956 91177308-0d34-0410-b5e6-96231b3b80d8
259 lines
11 KiB
C++
259 lines
11 KiB
C++
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
|
|
// selection DAG.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
|
|
#define LLVM_TARGET_AARCH64_ISELLOWERING_H
|
|
|
|
#include "Utils/AArch64BaseInfo.h"
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
|
#include "llvm/Target/TargetLowering.h"
|
|
|
|
|
|
namespace llvm {
|
|
namespace AArch64ISD {
|
|
enum NodeType {
|
|
// Start the numbering from where ISD NodeType finishes.
|
|
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
|
|
|
// This is a conditional branch which also notes the flag needed
|
|
// (eq/sgt/...). A64 puts this information on the branches rather than
|
|
// compares as LLVM does.
|
|
BR_CC,
|
|
|
|
// A node to be selected to an actual call operation: either BL or BLR in
|
|
// the absence of tail calls.
|
|
Call,
|
|
|
|
// Indicates a floating-point immediate which fits into the format required
|
|
// by the FMOV instructions. First (and only) operand is the 8-bit encoded
|
|
// value of that immediate.
|
|
FPMOV,
|
|
|
|
// Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
|
|
// and an LSB.
|
|
EXTR,
|
|
|
|
// Wraps a load from the GOT, which should always be performed with a 64-bit
|
|
// load instruction. This prevents the DAG combiner folding a truncate to
|
|
// form a smaller memory access.
|
|
GOTLoad,
|
|
|
|
// Performs a bitfield insert. Arguments are: the value being inserted into;
|
|
// the value being inserted; least significant bit changed; width of the
|
|
// field.
|
|
BFI,
|
|
|
|
// Simply a convenient node inserted during ISelLowering to represent
|
|
// procedure return. Will almost certainly be selected to "RET".
|
|
Ret,
|
|
|
|
/// Extracts a field of contiguous bits from the source and sign extends
|
|
/// them into a single register. Arguments are: source; immr; imms. Note
|
|
/// these are pre-encoded since DAG matching can't cope with combining LSB
|
|
/// and Width into these values itself.
|
|
SBFX,
|
|
|
|
/// This is an A64-ification of the standard LLVM SELECT_CC operation. The
|
|
/// main difference is that it only has the values and an A64 condition,
|
|
/// which will be produced by a setcc instruction.
|
|
SELECT_CC,
|
|
|
|
/// This serves most of the functions of the LLVM SETCC instruction, for two
|
|
/// purposes. First, it prevents optimisations from fiddling with the
|
|
/// compare after we've moved the CondCode information onto the SELECT_CC or
|
|
/// BR_CC instructions. Second, it gives a legal instruction for the actual
|
|
/// comparison.
|
|
///
|
|
/// It keeps a record of the condition flags asked for because certain
|
|
/// instructions are only valid for a subset of condition codes.
|
|
SETCC,
|
|
|
|
// Designates a node which is a tail call: both a call and a return
|
|
// instruction as far as selction is concerned. It should be selected to an
|
|
// unconditional branch. Has the usual plethora of call operands, but: 1st
|
|
// is callee, 2nd is stack adjustment required immediately before branch.
|
|
TC_RETURN,
|
|
|
|
// Designates a call used to support the TLS descriptor ABI. The call itself
|
|
// will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
|
|
// var") must be attached somehow during code generation. It takes two
|
|
// operands: the callee and the symbol to be relocated against.
|
|
TLSDESCCALL,
|
|
|
|
// Leaf node which will be lowered to an appropriate MRS to obtain the
|
|
// thread pointer: TPIDR_EL0.
|
|
THREAD_POINTER,
|
|
|
|
/// Extracts a field of contiguous bits from the source and zero extends
|
|
/// them into a single register. Arguments are: source; immr; imms. Note
|
|
/// these are pre-encoded since DAG matching can't cope with combining LSB
|
|
/// and Width into these values itself.
|
|
UBFX,
|
|
|
|
// Wraps an address which the ISelLowering phase has decided should be
|
|
// created using the large memory model style: i.e. a sequence of four
|
|
// movz/movk instructions.
|
|
WrapperLarge,
|
|
|
|
// Wraps an address which the ISelLowering phase has decided should be
|
|
// created using the small memory model style: i.e. adrp/add or
|
|
// adrp/mem-op. This exists to prevent bare TargetAddresses which may never
|
|
// get selected.
|
|
WrapperSmall
|
|
};
|
|
}
|
|
|
|
|
|
class AArch64Subtarget;
|
|
class AArch64TargetMachine;
|
|
|
|
class AArch64TargetLowering : public TargetLowering {
|
|
public:
|
|
explicit AArch64TargetLowering(AArch64TargetMachine &TM);
|
|
|
|
const char *getTargetNodeName(unsigned Opcode) const;
|
|
|
|
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
|
|
|
|
SDValue LowerFormalArguments(SDValue Chain,
|
|
CallingConv::ID CallConv, bool isVarArg,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
SDLoc dl, SelectionDAG &DAG,
|
|
SmallVectorImpl<SDValue> &InVals) const;
|
|
|
|
SDValue LowerReturn(SDValue Chain,
|
|
CallingConv::ID CallConv, bool isVarArg,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
SDLoc dl, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerCall(CallLoweringInfo &CLI,
|
|
SmallVectorImpl<SDValue> &InVals) const;
|
|
|
|
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
|
CallingConv::ID CallConv, bool IsVarArg,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
SDLoc dl, SelectionDAG &DAG,
|
|
SmallVectorImpl<SDValue> &InVals) const;
|
|
|
|
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
|
|
SDLoc DL, SDValue &Chain) const;
|
|
|
|
|
|
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
|
/// for tail call optimization. Targets which want to do tail call
|
|
/// optimization should implement this function.
|
|
bool IsEligibleForTailCallOptimization(SDValue Callee,
|
|
CallingConv::ID CalleeCC,
|
|
bool IsVarArg,
|
|
bool IsCalleeStructRet,
|
|
bool IsCallerStructRet,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
SelectionDAG& DAG) const;
|
|
|
|
/// Finds the incoming stack arguments which overlap the given fixed stack
|
|
/// object and incorporates their load into the current chain. This prevents
|
|
/// an upcoming store from clobbering the stack argument before it's used.
|
|
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
|
|
MachineFrameInfo *MFI, int ClobberedFI) const;
|
|
|
|
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
|
|
|
|
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
|
|
|
|
bool IsTailCallConvention(CallingConv::ID CallCC) const;
|
|
|
|
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
bool isLegalICmpImmediate(int64_t Val) const;
|
|
SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
|
SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const;
|
|
|
|
virtual MachineBasicBlock *
|
|
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
|
|
|
|
MachineBasicBlock *
|
|
emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
|
|
unsigned Size, unsigned Opcode) const;
|
|
|
|
MachineBasicBlock *
|
|
emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
|
|
unsigned Size, unsigned CmpOp,
|
|
A64CC::CondCodes Cond) const;
|
|
MachineBasicBlock *
|
|
emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
|
|
unsigned Size) const;
|
|
|
|
MachineBasicBlock *
|
|
EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
|
|
|
|
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
|
|
RTLIB::Libcall Call) const;
|
|
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
|
|
|
|
SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
|
|
SelectionDAG &DAG) const;
|
|
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
|
|
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
|
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
|
|
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
|
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
|
/// expanded to FMAs when this method returns true, otherwise fmuladd is
|
|
/// expanded to fmul + fadd.
|
|
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
|
|
|
|
ConstraintType getConstraintType(const std::string &Constraint) const;
|
|
|
|
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
|
|
const char *Constraint) const;
|
|
void LowerAsmOperandForConstraint(SDValue Op,
|
|
std::string &Constraint,
|
|
std::vector<SDValue> &Ops,
|
|
SelectionDAG &DAG) const;
|
|
|
|
std::pair<unsigned, const TargetRegisterClass*>
|
|
getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
|
|
private:
|
|
const InstrItineraryData *Itins;
|
|
|
|
const AArch64Subtarget *getSubtarget() const {
|
|
return &getTargetMachine().getSubtarget<AArch64Subtarget>();
|
|
}
|
|
};
|
|
} // namespace llvm
|
|
|
|
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
|