llvm-6502/lib/Target/X86/X86ISelDAGToDAG.cpp
Dan Gohman badb2d23d1 When x86 addresses matching exceeds its recursion limit, check to
see if the base register is already occupied before assuming it can be
used. This fixes bogus code generation in the accompanying testcase.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@41049 91177308-0d34-0410-b5e6-96231b3b80d8
2007-08-13 20:03:06 +00:00

1522 lines
52 KiB
C++

//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the Evan Cheng and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a DAG pattern matching instruction selector for X86,
// converting from a legalized dag to a X86 dag.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
#include <queue>
#include <set>
using namespace llvm;
STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added");
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
namespace {
/// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
/// SDOperand's instead of register numbers for the leaves of the matched
/// tree.
struct X86ISelAddressMode {
enum {
RegBase,
FrameIndexBase
} BaseType;
struct { // This is really a union, discriminated by BaseType!
SDOperand Reg;
int FrameIndex;
} Base;
bool isRIPRel; // RIP relative?
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
Constant *CP;
const char *ES;
int JT;
unsigned Align; // CP alignment.
X86ISelAddressMode()
: BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
GV(0), CP(0), ES(0), JT(-1), Align(0) {
}
};
}
namespace {
//===--------------------------------------------------------------------===//
/// ISel - X86 specific code to select X86 machine instructions for
/// SelectionDAG operations.
///
class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
/// ContainsFPCode - Every instruction we select that uses or defines a FP
/// register should set this to true.
bool ContainsFPCode;
/// FastISel - Enable fast(er) instruction selection.
///
bool FastISel;
/// TM - Keep a reference to X86TargetMachine.
///
X86TargetMachine &TM;
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
X86TargetLowering X86Lowering;
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
/// GlobalBaseReg - keeps track of the virtual register mapped onto global
/// base register.
unsigned GlobalBaseReg;
public:
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
: SelectionDAGISel(X86Lowering),
ContainsFPCode(false), FastISel(fast), TM(tm),
X86Lowering(*TM.getTargetLowering()),
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
return SelectionDAGISel::runOnFunction(Fn);
}
virtual const char *getPassName() const {
return "X86 DAG->DAG Instruction Selection";
}
/// InstructionSelectBasicBlock - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
private:
SDNode *Select(SDOperand N);
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot = true, unsigned Depth = 0);
bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth);
bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
SDOperand N, SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp,
SDOperand &InChain, SDOperand &OutChain);
bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
void InstructionSelectPreprocess(SelectionDAG &DAG);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
char ConstraintCode,
std::vector<SDOperand> &OutOps,
SelectionDAG &DAG);
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
AM.Base.Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
else if (AM.ES)
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
else if (AM.JT != -1)
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
else
Disp = getI32Imm(AM.Disp);
}
/// getI8Imm - Return a target constant with the specified value, of type
/// i8.
inline SDOperand getI8Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
/// getI16Imm - Return a target constant with the specified value, of type
/// i16.
inline SDOperand getI16Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i16);
}
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDOperand getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
/// getTruncate - return an SDNode that implements a subreg based truncate
/// of the specified operand to the the specified value type.
SDNode *getTruncate(SDOperand N0, MVT::ValueType VT);
#ifndef NDEBUG
unsigned Indent;
#endif
};
}
static SDNode *findFlagUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
SDOperand Op = User->getOperand(i);
if (Op.Val == N && Op.ResNo == FlagResNo)
return User;
}
}
return NULL;
}
static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SDNode *Skip, bool &found,
std::set<SDNode *> &Visited) {
if (found ||
Use->getNodeId() > Def->getNodeId() ||
!Visited.insert(Use).second)
return;
for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
SDNode *N = Use->getOperand(i).Val;
if (N == Skip)
continue;
if (N == Def) {
if (Use == ImmedUse)
continue; // Immediate use is ok.
if (Use == Root) {
assert(Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == X86ISD::CMP);
continue;
}
found = true;
break;
}
findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited);
}
}
/// isNonImmUse - Start searching from Root up the DAG to check is Def can
/// be reached. Return true if that's the case. However, ignore direct uses
/// by ImmedUse (which would be U in the example illustrated in
/// CanBeFoldedBy) and by Root (which can happen in the store case).
/// FIXME: to be really generic, we should allow direct use by any node
/// that is being folded. But realisticly since we only fold loads which
/// have one non-chain use, we only need to watch out for load/op/store
/// and load/op/cmp case where the root (store / cmp) may reach the load via
/// its chain operand.
static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
SDNode *Skip = NULL) {
std::set<SDNode *> Visited;
bool found = false;
findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited);
return found;
}
bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
if (FastISel) return false;
// If U use can somehow reach N through another path then U can't fold N or
// it will create a cycle. e.g. In the following diagram, U can reach N
// through X. If N is folded into into U, then X is both a predecessor and
// a successor of U.
//
// [ N ]
// ^ ^
// | |
// / \---
// / [X]
// | ^
// [U]--------|
if (isNonImmUse(Root, N, U))
return false;
// If U produces a flag, then it gets (even more) interesting. Since it
// would have been "glued" together with its flag use, we need to check if
// it might reach N:
//
// [ N ]
// ^ ^
// | |
// [U] \--
// ^ [TF]
// | ^
// | |
// \ /
// [FU]
//
// If FU (flag use) indirectly reach N (the load), and U fold N (call it
// NU), then TF is a predecessor of FU and a successor of NU. But since
// NU and FU are flagged together, this effectively creates a cycle.
bool HasFlagUse = false;
MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1);
while ((VT == MVT::Flag && !Root->use_empty())) {
SDNode *FU = findFlagUse(Root);
if (FU == NULL)
break;
else {
Root = FU;
HasFlagUse = true;
}
VT = Root->getValueType(Root->getNumValues()-1);
}
if (HasFlagUse)
return !isNonImmUse(Root, N, Root, U);
return true;
}
/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
/// and move load below the TokenFactor. Replace store's chain operand with
/// load's chain result.
static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
SDOperand Store, SDOperand TF) {
std::vector<SDOperand> Ops;
for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
if (Load.Val == TF.Val->getOperand(i).Val)
Ops.push_back(Load.Val->getOperand(0));
else
Ops.push_back(TF.Val->getOperand(i));
DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
Store.getOperand(2), Store.getOperand(3));
}
/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
/// selector to pick more load-modify-store instructions. This is a common
/// case:
///
/// [Load chain]
/// ^
/// |
/// [Load]
/// ^ ^
/// | |
/// / \-
/// / |
/// [TokenFactor] [Op]
/// ^ ^
/// | |
/// \ /
/// \ /
/// [Store]
///
/// The fact the store's chain operand != load's chain will prevent the
/// (store (op (load))) instruction from being selected. We can transform it to:
///
/// [Load chain]
/// ^
/// |
/// [TokenFactor]
/// ^
/// |
/// [Load]
/// ^ ^
/// | |
/// | \-
/// | |
/// | [Op]
/// | ^
/// | |
/// \ /
/// \ /
/// [Store]
void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I) {
if (!ISD::isNON_TRUNCStore(I))
continue;
SDOperand Chain = I->getOperand(0);
if (Chain.Val->getOpcode() != ISD::TokenFactor)
continue;
SDOperand N1 = I->getOperand(1);
SDOperand N2 = I->getOperand(2);
if (MVT::isFloatingPoint(N1.getValueType()) ||
MVT::isVector(N1.getValueType()) ||
!N1.hasOneUse())
continue;
bool RModW = false;
SDOperand Load;
unsigned Opcode = N1.Val->getOpcode();
switch (Opcode) {
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::ADDC:
case ISD::ADDE: {
SDOperand N10 = N1.getOperand(0);
SDOperand N11 = N1.getOperand(1);
if (ISD::isNON_EXTLoad(N10.Val))
RModW = true;
else if (ISD::isNON_EXTLoad(N11.Val)) {
RModW = true;
std::swap(N10, N11);
}
RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
if (RModW)
Load = N10;
break;
}
case ISD::SUB:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
case ISD::SUBC:
case ISD::SUBE:
case X86ISD::SHLD:
case X86ISD::SHRD: {
SDOperand N10 = N1.getOperand(0);
if (ISD::isNON_EXTLoad(N10.Val))
RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
if (RModW)
Load = N10;
break;
}
}
if (RModW) {
MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
++NumLoadMoved;
}
}
}
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
DEBUG(BB->dump());
MachineFunction::iterator FirstMBB = BB;
if (!FastISel)
InstructionSelectPreprocess(DAG);
// Codegen the basic block.
#ifndef NDEBUG
DOUT << "===== Instruction selection begins:\n";
Indent = 0;
#endif
DAG.setRoot(SelectRoot(DAG.getRoot()));
#ifndef NDEBUG
DOUT << "===== Instruction selection ends:\n";
#endif
DAG.RemoveDeadNodes();
// Emit machine code to BB.
ScheduleAndEmitDAG(DAG);
// If we are emitting FP stack code, scan the basic block to determine if this
// block defines any FP values. If so, put an FP_REG_KILL instruction before
// the terminator of the block.
// Note that FP stack instructions *are* used in SSE code for long double,
// so we do need this check.
bool ContainsFPCode = false;
// Scan all of the machine instructions in these MBBs, checking for FP
// stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
MachineFunction::iterator MBBI = FirstMBB;
do {
for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
!ContainsFPCode && I != E; ++I) {
if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
const TargetRegisterClass *clas;
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
X86::RFP32RegisterClass ||
clas == X86::RFP64RegisterClass ||
clas == X86::RFP80RegisterClass)) {
ContainsFPCode = true;
break;
}
}
}
}
} while (!ContainsFPCode && &*(MBBI++) != BB);
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
// a copy of the input value in this block. In SSE mode, we only care about
// 80-bit values.
if (!ContainsFPCode) {
// Final check, check LLVM BB's that are successors to the LLVM BB
// corresponding to BB for FP PHI nodes.
const BasicBlock *LLVMBB = BB->getBasicBlock();
const PHINode *PN;
for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
!ContainsFPCode && SI != E; ++SI) {
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
if (PN->getType()==Type::X86_FP80Ty ||
(!Subtarget->hasSSE2() && PN->getType()->isFloatingPoint())) {
ContainsFPCode = true;
break;
}
}
}
}
// Finally, if we found any FP code, emit the FP_REG_KILL instruction.
if (ContainsFPCode) {
BuildMI(*BB, BB->getFirstTerminator(),
TM.getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
}
}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing())
BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
// Switch the FPU to 64-bit precision mode for better compatibility and speed.
int CWFrameIdx = MFI->CreateStackObject(2, 2);
addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Set the high part to be 64-bit precision.
addFrameReference(BuildMI(BB, TII->get(X86::MOV8mi)),
CWFrameIdx, 1).addImm(2);
// Reload the modified control word now.
addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
}
void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
// If this is main, emit special code for main.
MachineBasicBlock *BB = MF.begin();
if (Fn.hasExternalLinkage() && Fn.getName() == "main")
EmitSpecialCodeForMain(BB, MF.getFrameInfo());
}
/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
// Limit recursion.
if (Depth > 5)
return MatchAddressBase(N, AM, isRoot, Depth);
// RIP relative addressing: %rip + 32-bit displacement!
if (AM.isRIPRel) {
if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
}
return true;
}
int id = N.Val->getNodeId();
bool Available = isSelected(id);
switch (N.getOpcode()) {
default: break;
case ISD::Constant: {
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
break;
}
case X86ISD::Wrapper: {
bool is64Bit = Subtarget->is64Bit();
// Under X86-64 non-small code model, GV (and friends) are 64-bits.
if (is64Bit && TM.getCodeModel() != CodeModel::Small)
break;
if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
break;
// If value is available in a register both base and index components have
// been picked, we can't fit the result available in the register in the
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
bool isStatic = TM.getRelocationModel() == Reloc::Static;
SDOperand N0 = N.getOperand(0);
// Mac OS X X86-64 lower 4G address is not available.
bool isAbs32 = !is64Bit ||
(isStatic && Subtarget->hasLow4GUserSpaceAddress());
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
GlobalValue *GV = G->getGlobal();
if (isAbs32 || isRoot) {
AM.GV = GV;
AM.Disp += G->getOffset();
AM.isRIPRel = !isAbs32;
return false;
}
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
if (isAbs32 || isRoot) {
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
AM.isRIPRel = !isAbs32;
return false;
}
} else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
if (isAbs32 || isRoot) {
AM.ES = S->getSymbol();
AM.isRIPRel = !isAbs32;
return false;
}
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
if (isAbs32 || isRoot) {
AM.JT = J->getIndex();
AM.isRIPRel = !isAbs32;
return false;
}
}
}
break;
}
case ISD::FrameIndex:
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
}
break;
case ISD::SHL:
if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
unsigned Val = CN->getValue();
if (Val == 1 || Val == 2 || Val == 3) {
AM.Scale = 1 << Val;
SDOperand ShVal = N.Val->getOperand(0);
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
if (isInt32(Disp))
AM.Disp = Disp;
else
AM.IndexReg = ShVal;
} else {
AM.IndexReg = ShVal;
}
return false;
}
}
break;
case ISD::MUL:
// X*[3,5,9] -> X+X*[2,4,8]
if (!Available &&
AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base.Reg.Val == 0 &&
AM.IndexReg.Val == 0) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
AM.Scale = unsigned(CN->getValue())-1;
SDOperand MulVal = N.Val->getOperand(0);
SDOperand Reg;
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
if (isInt32(Disp))
AM.Disp = Disp;
else
Reg = N.Val->getOperand(0);
} else {
Reg = N.Val->getOperand(0);
}
AM.IndexReg = AM.Base.Reg = Reg;
return false;
}
}
break;
case ISD::ADD:
if (!Available) {
X86ISelAddressMode Backup = AM;
if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
return false;
AM = Backup;
if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1))
return false;
AM = Backup;
}
break;
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
if (!Available) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
X86ISelAddressMode Backup = AM;
// Start with the LHS as an addr mode.
if (!MatchAddress(N.getOperand(0), AM, false) &&
// Address could not have picked a GV address for the displacement.
AM.GV == NULL &&
// On x86-64, the resultant disp must fit in 32-bits.
isInt32(AM.Disp + CN->getSignExtended()) &&
// Check to see if the LHS & C is zero.
CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
AM.Disp += CN->getValue();
return false;
}
AM = Backup;
}
}
break;
}
return MatchAddressBase(N, AM, isRoot, Depth);
}
/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
// If so, check to see if the scale index register is set.
if (AM.IndexReg.Val == 0) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
}
// Otherwise, we cannot select it.
return true;
}
// Default, generate it as a register.
AM.BaseType = X86ISelAddressMode::RegBase;
AM.Base.Reg = N;
return false;
}
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
X86ISelAddressMode AM;
if (MatchAddress(N, AM))
return false;
MVT::ValueType VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.Val)
AM.Base.Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.Val)
AM.IndexReg = CurDAG->getRegister(0, VT);
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
static inline bool isZeroNode(SDOperand Elt) {
return ((isa<ConstantSDNode>(Elt) &&
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
}
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp, SDOperand &InChain,
SDOperand &OutChain) {
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
InChain = N.getOperand(0).getValue(1);
if (ISD::isNON_EXTLoad(InChain.Val) &&
InChain.getValue(0).hasOneUse() &&
N.hasOneUse() &&
CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) {
LoadSDNode *LD = cast<LoadSDNode>(InChain);
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
return false;
OutChain = LD->getChain();
return true;
}
}
// Also handle the case where we explicitly require zeros in the top
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(1).Val->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
N.getOperand(1).getOperand(0).hasOneUse()) {
// Check to see if the BUILD_VECTOR is building a zero vector.
SDOperand BV = N.getOperand(0);
for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
if (!isZeroNode(BV.getOperand(i)) &&
BV.getOperand(i).getOpcode() != ISD::UNDEF)
return false; // Not a zero/undef vector.
// Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
// from the LHS.
unsigned VecWidth = BV.getNumOperands();
SDOperand ShufMask = N.getOperand(2);
assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
if (C->getValue() == VecWidth) {
for (unsigned i = 1; i != VecWidth; ++i) {
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
// ok.
} else {
ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
if (C->getValue() >= VecWidth) return false;
}
}
}
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
return false;
OutChain = LD->getChain();
InChain = SDOperand(LD, 1);
return true;
}
}
return false;
}
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
X86ISelAddressMode AM;
if (MatchAddress(N, AM))
return false;
MVT::ValueType VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.Val)
Complexity = 1;
else
AM.Base.Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.Val)
Complexity++;
else
AM.IndexReg = CurDAG->getRegister(0, VT);
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
// a simple shift.
if (AM.Scale > 1)
Complexity++;
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
// to a LEA. This is determined with some expermentation but is by no means
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
// For X86-64, we should always use lea to materialize RIP relative
// addresses.
if (Subtarget->is64Bit())
Complexity = 4;
else
Complexity += 2;
}
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
Complexity++;
if (Complexity > 2) {
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
return false;
}
bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
if (ISD::isNON_EXTLoad(N.Val) &&
N.hasOneUse() &&
CanBeFoldedBy(N.Val, P.Val, P.Val))
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
return false;
}
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = BB->getParent()->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
SSARegMap *RegMap = BB->getParent()->getSSARegMap();
unsigned PC = RegMap->createVirtualRegister(X86::GR32RegisterClass);
const TargetInstrInfo *TII = TM.getInstrInfo();
BuildMI(FirstMBB, MBBI, TII->get(X86::MovePCtoStack));
BuildMI(FirstMBB, MBBI, TII->get(X86::POP32r), PC);
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
if (TM.getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT()) {
GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg).
addReg(PC).
addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
} else {
GlobalBaseReg = PC;
}
}
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
}
static SDNode *FindCallStartFromCall(SDNode *Node) {
if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
assert(Node->getOperand(0).getValueType() == MVT::Other &&
"Node doesn't have a token chain argument!");
return FindCallStartFromCall(Node->getOperand(0).Val);
}
SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) {
SDOperand SRIdx;
switch (VT) {
case MVT::i8:
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
// Ensure that the source register has an 8-bit subreg on 32-bit targets
if (!Subtarget->is64Bit()) {
unsigned Opc;
MVT::ValueType VT;
switch (N0.getValueType()) {
default: assert(0 && "Unknown truncate!");
case MVT::i16:
Opc = X86::MOV16to16_;
VT = MVT::i16;
break;
case MVT::i32:
Opc = X86::MOV32to32_;
VT = MVT::i32;
break;
}
N0 =
SDOperand(CurDAG->getTargetNode(Opc, VT, N0), 0);
}
break;
case MVT::i16:
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
break;
case MVT::i32:
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
break;
default: assert(0 && "Unknown truncate!");
}
return CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
VT,
N0, SRIdx);
}
SDNode *X86DAGToDAGISel::Select(SDOperand N) {
SDNode *Node = N.Val;
MVT::ValueType NVT = Node->getValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
#ifndef NDEBUG
DOUT << std::string(Indent, ' ') << "Selecting: ";
DEBUG(Node->dump(CurDAG));
DOUT << "\n";
Indent += 2;
#endif
if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "== ";
DEBUG(Node->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return NULL; // Already selected.
}
switch (Opcode) {
default: break;
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
case ISD::ADD: {
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
// code and is matched first so to prevent it from being turned into
// LEA32r X+c.
// In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
MVT::ValueType PtrVT = TLI.getPointerTy();
SDOperand N0 = N.getOperand(0);
SDOperand N1 = N.getOperand(1);
if (N.Val->getValueType(0) == PtrVT &&
N0.getOpcode() == X86ISD::Wrapper &&
N1.getOpcode() == ISD::Constant) {
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
SDOperand C(0, 0);
// TODO: handle ExternalSymbolSDNode.
if (GlobalAddressSDNode *G =
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
G->getOffset() + Offset);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
CP->getAlignment(),
CP->getOffset()+Offset);
}
if (C.Val) {
if (Subtarget->is64Bit()) {
SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
CurDAG->getRegister(0, PtrVT), C };
return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
} else
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
}
}
// Other cases are handled by auto-generated code.
break;
}
case ISD::MUL: {
if (NVT == MVT::i8) {
SDOperand N0 = Node->getOperand(0);
SDOperand N1 = Node->getOperand(1);
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
if (!foldedLoad) {
foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
if (foldedLoad)
std::swap(N0, N1);
}
SDNode *ResNode;
if (foldedLoad) {
SDOperand Chain = N1.getOperand(0);
AddToISelQueue(N0);
AddToISelQueue(Chain);
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
SDOperand InFlag(0, 0);
Chain = CurDAG->getCopyToReg(Chain, X86::AL, N0, InFlag);
InFlag = Chain.getValue(1);
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
ResNode = CurDAG->getTargetNode(X86::MUL8m, MVT::i8, MVT::i8,
MVT::Other, Ops, 6);
ReplaceUses(N1.getValue(1), SDOperand(ResNode, 2));
} else {
SDOperand Chain = CurDAG->getEntryNode();
AddToISelQueue(N0);
AddToISelQueue(N1);
SDOperand InFlag(0, 0);
InFlag = CurDAG->getCopyToReg(Chain, X86::AL, N0, InFlag).getValue(1);
ResNode = CurDAG->getTargetNode(X86::MUL8r, MVT::i8, MVT::i8,
N1, InFlag);
}
ReplaceUses(N.getValue(0), SDOperand(ResNode, 0));
return NULL;
}
break;
}
case ISD::MULHU:
case ISD::MULHS: {
if (Opcode == ISD::MULHU)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
else
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
unsigned LoReg, HiReg;
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
}
SDOperand N0 = Node->getOperand(0);
SDOperand N1 = Node->getOperand(1);
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
// MULHU and MULHS are commmutative
if (!foldedLoad) {
foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
if (foldedLoad)
std::swap(N0, N1);
}
SDOperand Chain;
if (foldedLoad) {
Chain = N1.getOperand(0);
AddToISelQueue(Chain);
} else
Chain = CurDAG->getEntryNode();
SDOperand InFlag(0, 0);
AddToISelQueue(N0);
Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
N0, InFlag);
InFlag = Chain.getValue(1);
if (foldedLoad) {
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
SDNode *CNode =
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
Chain = SDOperand(CNode, 0);
InFlag = SDOperand(CNode, 1);
} else {
AddToISelQueue(N1);
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
SDOperand Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
Chain = Result.getValue(1);
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
MVT::i8, Result, SRIdx), 0);
} else {
Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
}
ReplaceUses(N.getValue(0), Result);
if (foldedLoad)
ReplaceUses(N1.getValue(1), Result.getValue(1));
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return NULL;
}
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: {
bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
if (!isSigned)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
else
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
unsigned LoReg, HiReg;
unsigned ClrOpcode, SExtOpcode;
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; HiReg = X86::AH;
ClrOpcode = 0;
SExtOpcode = X86::CBW;
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
ClrOpcode = X86::MOV16r0;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
LoReg = X86::EAX; HiReg = X86::EDX;
ClrOpcode = X86::MOV32r0;
SExtOpcode = X86::CDQ;
break;
case MVT::i64:
LoReg = X86::RAX; HiReg = X86::RDX;
ClrOpcode = X86::MOV64r0;
SExtOpcode = X86::CQO;
break;
}
SDOperand N0 = Node->getOperand(0);
SDOperand N1 = Node->getOperand(1);
SDOperand InFlag(0, 0);
if (NVT == MVT::i8 && !isSigned) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain;
if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) {
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) };
AddToISelQueue(N0.getOperand(0));
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
Move =
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other,
Ops, 5), 0);
Chain = Move.getValue(1);
ReplaceUses(N0.getValue(1), Chain);
} else {
AddToISelQueue(N0);
Move =
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0);
Chain = CurDAG->getEntryNode();
}
Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, InFlag);
InFlag = Chain.getValue(1);
} else {
AddToISelQueue(N0);
InFlag =
CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, N0,
InFlag).getValue(1);
if (isSigned) {
// Sign extend the low part into the high part.
InFlag =
SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
} else {
// Zero out the high part, effectively zero extending the input.
SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, ClrNode,
InFlag).getValue(1);
}
}
SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Chain;
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
if (foldedLoad) {
AddToISelQueue(N1.getOperand(0));
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
SDNode *CNode =
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
Chain = SDOperand(CNode, 0);
InFlag = SDOperand(CNode, 1);
} else {
AddToISelQueue(N1);
Chain = CurDAG->getEntryNode();
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
unsigned Reg = isDiv ? LoReg : HiReg;
SDOperand Result;
if (Reg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
Chain = Result.getValue(1);
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
MVT::i8, Result, SRIdx), 0);
} else {
Result = CurDAG->getCopyFromReg(Chain, Reg, NVT, InFlag);
Chain = Result.getValue(1);
}
ReplaceUses(N.getValue(0), Result);
if (foldedLoad)
ReplaceUses(N1.getValue(1), Chain);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return NULL;
}
case ISD::ANY_EXTEND: {
SDOperand N0 = Node->getOperand(0);
AddToISelQueue(N0);
if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) {
SDOperand SRIdx;
switch(N0.getValueType()) {
case MVT::i32:
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
break;
case MVT::i16:
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
break;
case MVT::i8:
if (Subtarget->is64Bit())
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
break;
default: assert(0 && "Unknown any_extend!");
}
if (SRIdx.Val) {
SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, NVT, N0, SRIdx);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
} // Otherwise let generated ISel handle it.
}
break;
}
case ISD::SIGN_EXTEND_INREG: {
SDOperand N0 = Node->getOperand(0);
AddToISelQueue(N0);
MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0);
unsigned Opc;
switch (NVT) {
case MVT::i16:
if (SVT == MVT::i8) Opc = X86::MOVSX16rr8;
else assert(0 && "Unknown sign_extend_inreg!");
break;
case MVT::i32:
switch (SVT) {
case MVT::i8: Opc = X86::MOVSX32rr8; break;
case MVT::i16: Opc = X86::MOVSX32rr16; break;
default: assert(0 && "Unknown sign_extend_inreg!");
}
break;
case MVT::i64:
switch (SVT) {
case MVT::i8: Opc = X86::MOVSX64rr8; break;
case MVT::i16: Opc = X86::MOVSX64rr16; break;
case MVT::i32: Opc = X86::MOVSX64rr32; break;
default: assert(0 && "Unknown sign_extend_inreg!");
}
break;
default: assert(0 && "Unknown sign_extend_inreg!");
}
SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(TruncOp.Val->dump(CurDAG));
DOUT << "\n";
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
break;
}
case ISD::TRUNCATE: {
SDOperand Input = Node->getOperand(0);
AddToISelQueue(Node->getOperand(0));
SDNode *ResNode = getTruncate(Input, NVT);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
break;
}
}
SDNode *ResNode = SelectCode(N);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
if (ResNode == NULL || ResNode == N.Val)
DEBUG(N.Val->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
}
bool X86DAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
SDOperand Op0, Op1, Op2, Op3;
switch (ConstraintCode) {
case 'o': // offsetable ??
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3))
return true;
break;
}
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
OutOps.push_back(Op3);
AddToISelQueue(Op0);
AddToISelQueue(Op1);
AddToISelQueue(Op2);
AddToISelQueue(Op3);
return false;
}
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
return new X86DAGToDAGISel(TM, Fast);
}