llvm-6502/lib/Target/ARM64/ARM64FastISel.cpp
Tim Northover 7b837d8c75 ARM64: initial backend import
This adds a second implementation of the AArch64 architecture to LLVM,
accessible in parallel via the "arm64" triple. The plan over the
coming weeks & months is to merge the two into a single backend,
during which time thorough code review should naturally occur.

Everything will be easier with the target in-tree though, hence this
commit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205090 91177308-0d34-0410-b5e6-96231b3b80d8
2014-03-29 10:18:08 +00:00

1930 lines
59 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the ARM64-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
// ARM64GenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#include "ARM64.h"
#include "ARM64TargetMachine.h"
#include "ARM64Subtarget.h"
#include "ARM64CallingConv.h"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
namespace {
class ARM64FastISel : public FastISel {
class Address {
public:
typedef enum {
RegBase,
FrameIndexBase
} BaseKind;
private:
BaseKind Kind;
union {
unsigned Reg;
int FI;
} Base;
int64_t Offset;
public:
Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
};
/// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const ARM64Subtarget *Subtarget;
LLVMContext *Context;
private:
// Selection routines.
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
bool SelectCmp(const Instruction *I);
bool SelectSelect(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
bool SelectFPToInt(const Instruction *I, bool Signed);
bool SelectIntToFP(const Instruction *I, bool Signed);
bool SelectRem(const Instruction *I, unsigned ISDOpcode);
bool SelectCall(const Instruction *I, const char *IntrMemName);
bool SelectIntrinsicCall(const IntrinsicInst &I);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
bool ComputeAddress(const Value *Obj, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
bool UseUnscaled);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
unsigned Flags, bool UseUnscaled);
bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
// Emit functions.
bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
bool UseUnscaled = false);
bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
bool UseUnscaled = false);
unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned ARM64MaterializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
unsigned &NumBytes);
bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
public:
// Backend specific FastISel code.
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual unsigned TargetMaterializeConstant(const Constant *C);
explicit ARM64FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<ARM64Subtarget>();
Context = &funcInfo.Fn->getContext();
}
virtual bool TargetSelectInstruction(const Instruction *I);
#include "ARM64GenFastISel.inc"
};
} // end anonymous namespace
#include "ARM64GenCallingConv.inc"
CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_ARM64_WebKit_JS;
return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS;
}
unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI))
return 0;
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
.addImm(0)
.addImm(0);
return ResultReg;
}
return 0;
}
unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
const APFloat Val = CFP->getValueAPF();
bool is64bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
int Imm;
unsigned Opc;
if (is64bit) {
Imm = ARM64_AM::getFP64Imm(Val);
Opc = ARM64::FMOVDi;
} else {
Imm = ARM64_AM::getFP32Imm(Val);
Opc = ARM64::FMOVSi;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Imm);
return ResultReg;
}
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
if (Align == 0)
Align = DL.getTypeAllocSize(CFP->getType());
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE);
unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
.addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
return ResultReg;
}
unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet. Unfortunately we have
// to peer through any aliases to find out if that rule applies.
const GlobalValue *TLSGV = GV;
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
TLSGV = GA->getAliasedGlobal();
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
if (GVar->isThreadLocal())
return 0;
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(GV->getType(), true);
if (!DestEVT.isSimple())
return 0;
MVT DestVT = DestEVT.getSimpleVT();
unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
if (OpFlags & ARM64II::MO_GOT) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF |
ARM64II::MO_NC);
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC)
.addImm(0);
}
return ResultReg;
}
unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
return 0;
MVT VT = CEVT.getSimpleVT();
// FIXME: Handle ConstantInt.
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return ARM64MaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return ARM64MaterializeGV(GV);
return 0;
}
// Computes the address to get to an object.
bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
switch (Opcode) {
default:
break;
case Instruction::BitCast: {
// Look through bitcasts.
return ComputeAddress(U->getOperand(0), Addr);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return ComputeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return ComputeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
// Iterate through the GEP folding the constants into offsets where
// we can.
gep_type_iterator GTI = gep_type_begin(U);
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
++i, ++GTI) {
const Value *Op = *i;
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
if (ComputeAddress(U->getOperand(0), Addr))
return true;
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Addr.setKind(Address::FrameIndexBase);
Addr.setFI(SI->second);
return true;
}
break;
}
}
// Try to get this in a register if nothing else has worked.
if (!Addr.isValid())
Addr.setReg(getRegForValue(Obj));
return Addr.isValid();
}
bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
VT = evt.getSimpleVT();
// Handle all legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT))
return true;
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now. For stores, this reflects truncation.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
}
bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
bool UseUnscaled) {
bool needsLowering = false;
int64_t Offset = Addr.getOffset();
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
if (!UseUnscaled)
// Using scaled, 12-bit, unsigned immediate offsets.
needsLowering = ((Offset & 0xfff) != Offset);
else
// Using unscaled, 9-bit, signed immediate offsets.
needsLowering = (Offset > 256 || Offset < -256);
break;
}
// FIXME: If this is a stack pointer and the offset needs to be simplified
// then put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
return false;
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (needsLowering) {
uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
UnscaledOffset, MVT::i64);
if (ResultReg == 0)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
}
return true;
}
void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool UseUnscaled) {
int64_t Offset = Addr.getOffset();
// Frame base works a bit differently. Handle it separately.
if (Addr.getKind() == Address::FrameIndexBase) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(FI, Offset), Flags,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.getReg());
MIB.addImm(Offset);
}
}
bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
UseUnscaled = true;
unsigned Opc;
const TargetRegisterClass *RC;
bool VTIsi1 = false;
int64_t ScaleFactor = 0;
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
VTIsi1 = true;
// Intentional fall-through.
case MVT::i8:
Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui;
RC = &ARM64::GPR32RegClass;
ScaleFactor = 1;
break;
case MVT::i16:
Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui;
RC = &ARM64::GPR32RegClass;
ScaleFactor = 2;
break;
case MVT::i32:
Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui;
RC = &ARM64::GPR32RegClass;
ScaleFactor = 4;
break;
case MVT::i64:
Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui;
RC = &ARM64::GPR64RegClass;
ScaleFactor = 8;
break;
case MVT::f32:
Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 4;
break;
case MVT::f64:
Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 8;
break;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false;
// Create the base instruction, then add the operands.
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
// Loading an i1 requires special handling.
if (VTIsi1) {
unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
ANDReg)
.addReg(ResultReg)
.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
ResultReg = ANDReg;
}
return true;
}
bool ARM64FastISel::SelectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
return false;
// See if we can handle this address.
Address Addr;
if (!ComputeAddress(I->getOperand(0), Addr))
return false;
unsigned ResultReg;
if (!EmitLoad(VT, ResultReg, Addr))
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
UseUnscaled = true;
unsigned StrOpc;
bool VTIsi1 = false;
int64_t ScaleFactor = 0;
// Using scaled, 12-bit, unsigned immediate offsets.
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
VTIsi1 = true;
case MVT::i8:
StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui;
ScaleFactor = 1;
break;
case MVT::i16:
StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui;
ScaleFactor = 2;
break;
case MVT::i32:
StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui;
ScaleFactor = 4;
break;
case MVT::i64:
StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui;
ScaleFactor = 8;
break;
case MVT::f32:
StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui;
ScaleFactor = 4;
break;
case MVT::f64:
StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui;
ScaleFactor = 8;
break;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false;
// Storing an i1 requires special handling.
if (VTIsi1) {
unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
ANDReg)
.addReg(SrcReg)
.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(StrOpc)).addReg(SrcReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
return true;
}
bool ARM64FastISel::SelectStore(const Instruction *I) {
MVT VT;
Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
cast<StoreInst>(I)->isAtomic())
return false;
// Get the value to be stored into a register.
unsigned SrcReg = getRegForValue(Op0);
if (SrcReg == 0)
return false;
// See if we can handle this address.
Address Addr;
if (!ComputeAddress(I->getOperand(1), Addr))
return false;
if (!EmitStore(VT, SrcReg, Addr))
return false;
return true;
}
static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
switch (Pred) {
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UEQ:
default:
// AL is our "false" for now. The other two need more compares.
return ARM64CC::AL;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
return ARM64CC::EQ;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
return ARM64CC::GT;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
return ARM64CC::GE;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
return ARM64CC::HI;
case CmpInst::FCMP_OLT:
return ARM64CC::MI;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
return ARM64CC::LS;
case CmpInst::FCMP_ORD:
return ARM64CC::VC;
case CmpInst::FCMP_UNO:
return ARM64CC::VS;
case CmpInst::FCMP_UGE:
return ARM64CC::PL;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
return ARM64CC::LT;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
return ARM64CC::LE;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
return ARM64CC::NE;
case CmpInst::ICMP_UGE:
return ARM64CC::CS;
case CmpInst::ICMP_ULT:
return ARM64CC::CC;
}
}
bool ARM64FastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// We may not handle every CC for now.
ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
if (CC == ARM64CC::AL)
return false;
// Emit the cmp.
if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
.addImm(CC)
.addMBB(TBB);
FuncInfo.MBB->addSuccessor(TBB);
FastEmitBranch(FBB, DbgLoc);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
MVT SrcVT;
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
(isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
unsigned CondReg = getRegForValue(TI->getOperand(0));
if (CondReg == 0)
return false;
// Issue an extract_subreg to get the lower 32-bits.
if (SrcVT == MVT::i64)
CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
ARM64::sub_32);
unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
ANDReg)
.addReg(CondReg)
.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
unsigned CC = ARM64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = ARM64CC::EQ;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
.addImm(CC)
.addMBB(TBB);
FuncInfo.MBB->addSuccessor(TBB);
FastEmitBranch(FBB, DbgLoc);
return true;
}
} else if (const ConstantInt *CI =
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B))
.addMBB(Target);
FuncInfo.MBB->addSuccessor(Target);
return true;
}
unsigned CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
// We've been divorced from our compare! Our block was split, and
// now our compare lives in a predecessor block. We musn't
// re-compare here, as the children of the compare aren't guaranteed
// live across the block boundary (we *could* check for this).
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri),
ARM64::WZR)
.addReg(CondReg)
.addImm(0)
.addImm(0);
unsigned CC = ARM64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = ARM64CC::EQ;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
.addImm(CC)
.addMBB(TBB);
FuncInfo.MBB->addSuccessor(TBB);
FastEmitBranch(FBB, DbgLoc);
return true;
}
bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
unsigned AddrReg = getRegForValue(BI->getOperand(0));
if (AddrReg == 0)
return false;
// Emit the indirect branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR))
.addReg(AddrReg);
// Make sure the CFG is up-to-date.
for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
return true;
}
bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
// Check to see if the 2nd operand is a constant that we can encode directly
// in the compare.
uint64_t Imm;
bool UseImm = false;
bool isNegativeImm = false;
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
SrcVT == MVT::i8 || SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
if (CIVal.isNegative()) {
isNegativeImm = true;
Imm = -Imm;
}
// FIXME: We can handle more immediates using shifts.
UseImm = ((Imm & 0xfff) == Imm);
}
} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
if (ConstFP->isZero() && !ConstFP->isNegative())
UseImm = true;
}
unsigned ZReg;
unsigned CmpOpc;
bool isICmp = true;
bool needsExt = false;
switch (SrcVT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
needsExt = true;
// Intentional fall-through.
case MVT::i32:
ZReg = ARM64::WZR;
if (UseImm)
CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri;
else
CmpOpc = ARM64::SUBSWrr;
break;
case MVT::i64:
ZReg = ARM64::XZR;
if (UseImm)
CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri;
else
CmpOpc = ARM64::SUBSXrr;
break;
case MVT::f32:
isICmp = false;
CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr;
break;
case MVT::f64:
isICmp = false;
CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr;
break;
}
unsigned SrcReg1 = getRegForValue(Src1Value);
if (SrcReg1 == 0)
return false;
unsigned SrcReg2;
if (!UseImm) {
SrcReg2 = getRegForValue(Src2Value);
if (SrcReg2 == 0)
return false;
}
// We have i1, i8, or i16, we need to either zero extend or sign extend.
if (needsExt) {
SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
if (SrcReg1 == 0)
return false;
if (!UseImm) {
SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
if (SrcReg2 == 0)
return false;
}
}
if (isICmp) {
if (UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(ZReg)
.addReg(SrcReg1)
.addImm(Imm)
.addImm(0);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(ZReg)
.addReg(SrcReg1)
.addReg(SrcReg2);
} else {
if (UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(SrcReg1);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(SrcReg1)
.addReg(SrcReg2);
}
return true;
}
bool ARM64FastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
// We may not handle every CC for now.
ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
if (CC == ARM64CC::AL)
return false;
// Emit the cmp.
if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// Now set a register based on the comparison.
ARM64CC::CondCode invertedCC = getInvertedCondCode(CC);
unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr),
ResultReg)
.addReg(ARM64::WZR)
.addReg(ARM64::WZR)
.addImm(invertedCC);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::SelectSelect(const Instruction *I) {
const SelectInst *SI = cast<SelectInst>(I);
EVT DestEVT = TLI.getValueType(SI->getType(), true);
if (!DestEVT.isSimple())
return false;
MVT DestVT = DestEVT.getSimpleVT();
if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
DestVT != MVT::f64)
return false;
unsigned CondReg = getRegForValue(SI->getCondition());
if (CondReg == 0)
return false;
unsigned TrueReg = getRegForValue(SI->getTrueValue());
if (TrueReg == 0)
return false;
unsigned FalseReg = getRegForValue(SI->getFalseValue());
if (FalseReg == 0)
return false;
unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
ANDReg)
.addReg(CondReg)
.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
unsigned SelectOpc;
switch (DestVT.SimpleTy) {
default:
return false;
case MVT::i32:
SelectOpc = ARM64::CSELWr;
break;
case MVT::i64:
SelectOpc = ARM64::CSELXr;
break;
case MVT::f32:
SelectOpc = ARM64::FCSELSrrr;
break;
case MVT::f64:
SelectOpc = ARM64::FCSELDrrr;
break;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
ResultReg)
.addReg(TrueReg)
.addReg(FalseReg)
.addImm(ARM64CC::NE);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::SelectFPExt(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
unsigned Op = getRegForValue(V);
if (Op == 0)
return false;
unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr),
ResultReg).addReg(Op);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
unsigned Op = getRegForValue(V);
if (Op == 0)
return false;
unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr),
ResultReg).addReg(Op);
UpdateValueMap(I, ResultReg);
return true;
}
// FPToUI and FPToSI
bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (SrcReg == 0)
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
unsigned Opc;
if (SrcVT == MVT::f64) {
if (Signed)
Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr;
else
Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr;
} else {
if (Signed)
Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr;
else
Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (SrcReg == 0)
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
SrcReg =
EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
if (SrcReg == 0)
return false;
}
unsigned Opc;
if (SrcVT == MVT::i64) {
if (Signed)
Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri;
else
Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri;
} else {
if (Signed)
Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri;
else
Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC, unsigned &NumBytes) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
.addImm(NumBytes);
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Handle arg promotion: SExt, ZExt, AExt.
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
if (Arg == 0)
return false;
ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
// Intentional fall-through.
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
if (Arg == 0)
return false;
ArgVT = DestVT;
break;
}
default:
llvm_unreachable("Unknown arg promotion!");
}
// Now copy/store arg to correct locations.
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// FIXME: Handle custom args.
return false;
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
// Need to store on the stack.
Address Addr;
Addr.setKind(Address::RegBase);
Addr.setReg(ARM64::SP);
Addr.setOffset(VA.getLocMemOffset());
if (!EmitStore(ArgVT, Arg, Addr))
return false;
}
}
return true;
}
bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
.addImm(NumBytes)
.addImm(0);
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
// Only handle a single return value.
if (RVLocs.size() != 1)
return false;
// Copy all of the result registers out of their specified physreg.
MVT CopyVT = RVLocs[0].getValVT();
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(RVLocs[0].getLocReg());
UsedRegs.push_back(RVLocs[0].getLocReg());
// Finally update the result.
UpdateValueMap(I, ResultReg);
}
return true;
}
bool ARM64FastISel::SelectCall(const Instruction *I,
const char *IntrMemName = 0) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
// Don't handle inline asm or intrinsics.
if (isa<InlineAsm>(Callee))
return false;
// Only handle global variable Callees.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
if (!GV)
return false;
// Check the calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
// Let SDISel handle vararg functions.
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
if (FTy->isVarArg())
return false;
// Handle *simple* calls for now.
MVT RetVT;
Type *RetTy = I->getType();
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT))
return false;
// Set up the argument vectors.
SmallVector<Value *, 8> Args;
SmallVector<unsigned, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(CS.arg_size());
ArgRegs.reserve(CS.arg_size());
ArgVTs.reserve(CS.arg_size());
ArgFlags.reserve(CS.arg_size());
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
// If we're lowering a memory intrinsic instead of a regular call, skip the
// last two arguments, which shouldn't be passed to the underlying function.
if (IntrMemName && e - i <= 2)
break;
unsigned Arg = getRegForValue(*i);
if (Arg == 0)
return false;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
Flags.setSExt();
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
MVT ArgVT;
Type *ArgTy = (*i)->getType();
if (!isTypeLegal(ArgTy, ArgVT) &&
!(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16))
return false;
// We don't handle vector parameters yet.
if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
return false;
unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
Args.push_back(*i);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Handle the arguments now that we've gotten them.
SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
// Issue the call.
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL));
if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
else
MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i], RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
return false;
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
return true;
}
bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
if (Alignment)
return Len / Alignment <= 4;
else
return Len < 32;
}
bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
if (!IsMemCpySmall(Len, Alignment))
return false;
int64_t UnscaledOffset = 0;
Address OrigDest = Dest;
Address OrigSrc = Src;
while (Len) {
MVT VT;
if (!Alignment || Alignment >= 8) {
if (Len >= 8)
VT = MVT::i64;
else if (Len >= 4)
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
else {
VT = MVT::i8;
}
} else {
// Bound based on alignment.
if (Len >= 4 && Alignment == 4)
VT = MVT::i32;
else if (Len >= 2 && Alignment == 2)
VT = MVT::i16;
else {
VT = MVT::i8;
}
}
bool RV;
unsigned ResultReg;
RV = EmitLoad(VT, ResultReg, Src);
assert(RV == true && "Should be able to handle this load.");
RV = EmitStore(VT, ResultReg, Dest);
assert(RV == true && "Should be able to handle this store.");
(void)RV;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
UnscaledOffset += Size;
// We need to recompute the unscaled offset for each iteration.
Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
}
return true;
}
bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default:
return false;
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const MemTransferInst &MTI = cast<MemTransferInst>(I);
// Don't handle volatile.
if (MTI.isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
// Small memcpy's are common enough that we want to do them without a call
// if possible.
uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
unsigned Alignment = MTI.getAlignment();
if (IsMemCpySmall(Len, Alignment)) {
Address Dest, Src;
if (!ComputeAddress(MTI.getRawDest(), Dest) ||
!ComputeAddress(MTI.getRawSource(), Src))
return false;
if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
if (!MTI.getLength()->getType()->isIntegerTy(64))
return false;
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
return SelectCall(&I, IntrMemName);
}
case Intrinsic::memset: {
const MemSetInst &MSI = cast<MemSetInst>(I);
// Don't handle volatile.
if (MSI.isVolatile())
return false;
if (!MSI.getLength()->getType()->isIntegerTy(64))
return false;
if (MSI.getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK))
.addImm(1);
return true;
}
}
return false;
}
bool ARM64FastISel::SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
if (!FuncInfo.CanLowerReturn)
return false;
if (F.isVarArg())
return false;
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
I->getContext());
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
: RetCC_ARM64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
// Only handle a single return value for now.
if (ValLocs.size() != 1)
return false;
CCValAssign &VA = ValLocs[0];
const Value *RV = Ret->getOperand(0);
// Don't bother handling odd stuff for now.
if (VA.getLocInfo() != CCValAssign::Full)
return false;
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
unsigned SrcReg = Reg + VA.getValNo();
unsigned DestReg = VA.getLocReg();
// Avoid a cross-class copy. This is very unlikely.
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
EVT RVEVT = TLI.getValueType(RV->getType());
if (!RVEVT.isSimple())
return false;
MVT RVVT = RVEVT.getSimpleVT();
MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
return false;
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
return false;
bool isZExt = Outs[0].Flags.isZExt();
SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
if (SrcReg == 0)
return false;
}
// Make the copy.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
// Add register to return instruction.
RetRegs.push_back(VA.getLocReg());
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM64::RET_ReallyLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
bool ARM64FastISel::SelectTrunc(const Instruction *I) {
Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
EVT SrcEVT = TLI.getValueType(SrcTy, true);
EVT DestEVT = TLI.getValueType(DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
SrcVT != MVT::i8)
return false;
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
DestVT != MVT::i1)
return false;
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
// If we're truncating from i64 to a smaller non-legal type then generate an
// AND. Otherwise, we know the high bits are undefined and a truncate doesn't
// generate any code.
if (SrcVT == MVT::i64) {
uint64_t Mask = 0;
switch (DestVT.SimpleTy) {
default:
// Trunc i64 to i32 is handled by the target-independent fast-isel.
return false;
case MVT::i1:
Mask = 0x1;
break;
case MVT::i8:
Mask = 0xff;
break;
case MVT::i16:
Mask = 0xffff;
break;
}
// Issue an extract_subreg to get the lower 32-bits.
unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
ARM64::sub_32);
// Create the AND instruction which performs the actual truncation.
unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
ANDReg)
.addReg(Reg32)
.addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32));
SrcReg = ANDReg;
}
UpdateValueMap(I, SrcReg);
return true;
}
unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
DestVT == MVT::i64) &&
"Unexpected value type.");
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
if (isZExt) {
unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
ResultReg)
.addReg(SrcReg)
.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(ResultReg)
.addImm(ARM64::sub_32);
ResultReg = Reg64;
}
return ResultReg;
} else {
if (DestVT == MVT::i64) {
// FIXME: We're SExt i1 to i64.
return 0;
}
unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri),
ResultReg)
.addReg(SrcReg)
.addImm(0)
.addImm(0);
return ResultReg;
}
}
unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
unsigned Opc;
unsigned Imm = 0;
switch (SrcVT.SimpleTy) {
default:
return 0;
case MVT::i1:
return Emiti1Ext(SrcReg, DestVT, isZExt);
case MVT::i8:
if (DestVT == MVT::i64)
Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
else
Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
Imm = 7;
break;
case MVT::i16:
if (DestVT == MVT::i64)
Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
else
Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
Imm = 15;
break;
case MVT::i32:
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
Imm = 31;
break;
}
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg)
.addImm(0)
.addImm(Imm);
return ResultReg;
}
bool ARM64FastISel::SelectIntExt(const Instruction *I) {
// On ARM, in general, integer casts don't involve legal types; this code
// handles promotable integers. The high bits for a type smaller than
// the register size are assumed to be undefined.
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
bool isZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
EVT SrcEVT = TLI.getValueType(SrcTy, true);
EVT DestEVT = TLI.getValueType(DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (ResultReg == 0)
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
EVT DestEVT = TLI.getValueType(I->getType(), true);
if (!DestEVT.isSimple())
return false;
MVT DestVT = DestEVT.getSimpleVT();
if (DestVT != MVT::i64 && DestVT != MVT::i32)
return false;
unsigned DivOpc;
bool is64bit = (DestVT == MVT::i64);
switch (ISDOpcode) {
default:
return false;
case ISD::SREM:
DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr;
break;
case ISD::UREM:
DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr;
break;
}
unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr;
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), ResultReg)
.addReg(Src0Reg)
.addReg(Src1Reg);
// The remainder is computed as numerator (quotient * denominator) using the
// MSUB instruction.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
.addReg(ResultReg)
.addReg(Src1Reg)
.addReg(Src0Reg);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::SelectMul(const Instruction *I) {
EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
// Must be simple value type. Don't handle vectors.
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
SrcVT != MVT::i8)
return false;
unsigned Opc;
unsigned ZReg;
switch (SrcVT.SimpleTy) {
default:
return false;
case MVT::i8:
case MVT::i16:
case MVT::i32:
ZReg = ARM64::WZR;
Opc = ARM64::MADDWrrr;
break;
case MVT::i64:
ZReg = ARM64::XZR;
Opc = ARM64::MADDXrrr;
break;
}
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
// Create the base instruction, then add the operands.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Src0Reg)
.addReg(Src1Reg)
.addReg(ZReg);
UpdateValueMap(I, ResultReg);
return true;
}
bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
case Instruction::Load:
return SelectLoad(I);
case Instruction::Store:
return SelectStore(I);
case Instruction::Br:
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
case Instruction::FCmp:
case Instruction::ICmp:
return SelectCmp(I);
case Instruction::Select:
return SelectSelect(I);
case Instruction::FPExt:
return SelectFPExt(I);
case Instruction::FPTrunc:
return SelectFPTrunc(I);
case Instruction::FPToSI:
return SelectFPToInt(I, /*Signed=*/true);
case Instruction::FPToUI:
return SelectFPToInt(I, /*Signed=*/false);
case Instruction::SIToFP:
return SelectIntToFP(I, /*Signed=*/true);
case Instruction::UIToFP:
return SelectIntToFP(I, /*Signed=*/false);
case Instruction::SRem:
return SelectRem(I, ISD::SREM);
case Instruction::URem:
return SelectRem(I, ISD::UREM);
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
return SelectIntrinsicCall(*II);
return SelectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
case Instruction::Mul:
// FIXME: This really should be handled by the target-independent selector.
return SelectMul(I);
}
return false;
// Silence warnings.
(void)CC_ARM64_DarwinPCS_VarArg;
}
namespace llvm {
llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
return new ARM64FastISel(funcInfo, libInfo);
}
}