llvm-6502/lib/Target/X86/X86ISelDAGToDAG.cpp

1634 lines
58 KiB
C++
Raw Normal View History

//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a DAG pattern matching instruction selector for X86,
// converting from a legalized dag to a X86 dag.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
#include <queue>
#include <set>
using namespace llvm;
STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added");
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
namespace {
/// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
/// SDOperand's instead of register numbers for the leaves of the matched
/// tree.
struct X86ISelAddressMode {
enum {
RegBase,
FrameIndexBase
} BaseType;
struct { // This is really a union, discriminated by BaseType!
SDOperand Reg;
int FrameIndex;
} Base;
bool isRIPRel; // RIP as base?
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
Constant *CP;
const char *ES;
int JT;
unsigned Align; // CP alignment.
X86ISelAddressMode()
: BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
GV(0), CP(0), ES(0), JT(-1), Align(0) {
}
};
}
namespace {
//===--------------------------------------------------------------------===//
/// ISel - X86 specific code to select X86 machine instructions for
/// SelectionDAG operations.
///
class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
/// ContainsFPCode - Every instruction we select that uses or defines a FP
/// register should set this to true.
bool ContainsFPCode;
/// FastISel - Enable fast(er) instruction selection.
///
bool FastISel;
/// TM - Keep a reference to X86TargetMachine.
///
X86TargetMachine &TM;
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
X86TargetLowering X86Lowering;
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
/// GlobalBaseReg - keeps track of the virtual register mapped onto global
/// base register.
unsigned GlobalBaseReg;
public:
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
: SelectionDAGISel(X86Lowering),
ContainsFPCode(false), FastISel(fast), TM(tm),
X86Lowering(*TM.getTargetLowering()),
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
return SelectionDAGISel::runOnFunction(Fn);
}
virtual const char *getPassName() const {
return "X86 DAG->DAG Instruction Selection";
}
/// InstructionSelectBasicBlock - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
private:
SDNode *Select(SDOperand N);
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot = true, unsigned Depth = 0);
bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth);
bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
SDOperand N, SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp,
SDOperand &InChain, SDOperand &OutChain);
bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
Significantly simplify and improve handling of FP function results on x86-32. This case returns the value in ST(0) and then has to convert it to an SSE register. This causes significant codegen ugliness in some cases. For example in the trivial fp-stack-direct-ret.ll testcase we used to generate: _bar: subl $28, %esp call L_foo$stub fstpl 16(%esp) movsd 16(%esp), %xmm0 movsd %xmm0, 8(%esp) fldl 8(%esp) addl $28, %esp ret because we move the result of foo() into an XMM register, then have to move it back for the return of bar. Instead of hacking ever-more special cases into the call result lowering code we take a much simpler approach: on x86-32, fp return is modeled as always returning into an f80 register which is then truncated to f32 or f64 as needed. Similarly for a result, we model it as an extension to f80 + return. This exposes the truncate and extensions to the dag combiner, allowing target independent code to hack on them, eliminating them in this case. This gives us this code for the example above: _bar: subl $12, %esp call L_foo$stub addl $12, %esp ret The nasty aspect of this is that these conversions are not legal, but we want the second pass of dag combiner (post-legalize) to be able to hack on them. To handle this, we lie to legalize and say they are legal, then custom expand them on entry to the isel pass (PreprocessForFPConvert). This is gross, but less gross than the code it is replacing :) This also allows us to generate better code in several other cases. For example on fp-stack-ret-conv.ll, we now generate: _test: subl $12, %esp call L_foo$stub fstps 8(%esp) movl 16(%esp), %eax cvtss2sd 8(%esp), %xmm0 movsd %xmm0, (%eax) addl $12, %esp ret where before we produced (incidentally, the old bad code is identical to what gcc produces): _test: subl $12, %esp call L_foo$stub fstpl (%esp) cvtsd2ss (%esp), %xmm0 cvtss2sd %xmm0, %xmm0 movl 16(%esp), %eax movsd %xmm0, (%eax) addl $12, %esp ret Note that we generate slightly worse code on pr1505b.ll due to a scheduling deficiency that is unrelated to this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46307 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-24 08:07:48 +00:00
void PreprocessForRMW(SelectionDAG &DAG);
void PreprocessForFPConvert(SelectionDAG &DAG);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
char ConstraintCode,
std::vector<SDOperand> &OutOps,
SelectionDAG &DAG);
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
AM.Base.Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
else if (AM.ES)
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
else if (AM.JT != -1)
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
else
Disp = getI32Imm(AM.Disp);
}
/// getI8Imm - Return a target constant with the specified value, of type
/// i8.
inline SDOperand getI8Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
/// getI16Imm - Return a target constant with the specified value, of type
/// i16.
inline SDOperand getI16Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i16);
}
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDOperand getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
/// getTruncate - return an SDNode that implements a subreg based truncate
/// of the specified operand to the the specified value type.
SDNode *getTruncate(SDOperand N0, MVT::ValueType VT);
#ifndef NDEBUG
unsigned Indent;
#endif
};
}
static SDNode *findFlagUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
SDOperand Op = User->getOperand(i);
if (Op.Val == N && Op.ResNo == FlagResNo)
return User;
}
}
return NULL;
}
static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SDNode *Skip, bool &found,
std::set<SDNode *> &Visited) {
if (found ||
Use->getNodeId() > Def->getNodeId() ||
!Visited.insert(Use).second)
return;
for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
SDNode *N = Use->getOperand(i).Val;
if (N == Skip)
continue;
if (N == Def) {
if (Use == ImmedUse)
continue; // Immediate use is ok.
if (Use == Root) {
assert(Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == X86ISD::CMP);
continue;
}
found = true;
break;
}
findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited);
}
}
/// isNonImmUse - Start searching from Root up the DAG to check is Def can
/// be reached. Return true if that's the case. However, ignore direct uses
/// by ImmedUse (which would be U in the example illustrated in
/// CanBeFoldedBy) and by Root (which can happen in the store case).
/// FIXME: to be really generic, we should allow direct use by any node
/// that is being folded. But realisticly since we only fold loads which
/// have one non-chain use, we only need to watch out for load/op/store
/// and load/op/cmp case where the root (store / cmp) may reach the load via
/// its chain operand.
static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
SDNode *Skip = NULL) {
std::set<SDNode *> Visited;
bool found = false;
findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited);
return found;
}
bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
if (FastISel) return false;
// If U use can somehow reach N through another path then U can't fold N or
// it will create a cycle. e.g. In the following diagram, U can reach N
// through X. If N is folded into into U, then X is both a predecessor and
// a successor of U.
//
// [ N ]
// ^ ^
// | |
// / \---
// / [X]
// | ^
// [U]--------|
if (isNonImmUse(Root, N, U))
return false;
// If U produces a flag, then it gets (even more) interesting. Since it
// would have been "glued" together with its flag use, we need to check if
// it might reach N:
//
// [ N ]
// ^ ^
// | |
// [U] \--
// ^ [TF]
// | ^
// | |
// \ /
// [FU]
//
// If FU (flag use) indirectly reach N (the load), and U fold N (call it
// NU), then TF is a predecessor of FU and a successor of NU. But since
// NU and FU are flagged together, this effectively creates a cycle.
bool HasFlagUse = false;
MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1);
while ((VT == MVT::Flag && !Root->use_empty())) {
SDNode *FU = findFlagUse(Root);
if (FU == NULL)
break;
else {
Root = FU;
HasFlagUse = true;
}
VT = Root->getValueType(Root->getNumValues()-1);
}
if (HasFlagUse)
return !isNonImmUse(Root, N, Root, U);
return true;
}
/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
/// and move load below the TokenFactor. Replace store's chain operand with
/// load's chain result.
static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
SDOperand Store, SDOperand TF) {
std::vector<SDOperand> Ops;
for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
if (Load.Val == TF.Val->getOperand(i).Val)
Ops.push_back(Load.Val->getOperand(0));
else
Ops.push_back(TF.Val->getOperand(i));
DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
Store.getOperand(2), Store.getOperand(3));
}
Significantly simplify and improve handling of FP function results on x86-32. This case returns the value in ST(0) and then has to convert it to an SSE register. This causes significant codegen ugliness in some cases. For example in the trivial fp-stack-direct-ret.ll testcase we used to generate: _bar: subl $28, %esp call L_foo$stub fstpl 16(%esp) movsd 16(%esp), %xmm0 movsd %xmm0, 8(%esp) fldl 8(%esp) addl $28, %esp ret because we move the result of foo() into an XMM register, then have to move it back for the return of bar. Instead of hacking ever-more special cases into the call result lowering code we take a much simpler approach: on x86-32, fp return is modeled as always returning into an f80 register which is then truncated to f32 or f64 as needed. Similarly for a result, we model it as an extension to f80 + return. This exposes the truncate and extensions to the dag combiner, allowing target independent code to hack on them, eliminating them in this case. This gives us this code for the example above: _bar: subl $12, %esp call L_foo$stub addl $12, %esp ret The nasty aspect of this is that these conversions are not legal, but we want the second pass of dag combiner (post-legalize) to be able to hack on them. To handle this, we lie to legalize and say they are legal, then custom expand them on entry to the isel pass (PreprocessForFPConvert). This is gross, but less gross than the code it is replacing :) This also allows us to generate better code in several other cases. For example on fp-stack-ret-conv.ll, we now generate: _test: subl $12, %esp call L_foo$stub fstps 8(%esp) movl 16(%esp), %eax cvtss2sd 8(%esp), %xmm0 movsd %xmm0, (%eax) addl $12, %esp ret where before we produced (incidentally, the old bad code is identical to what gcc produces): _test: subl $12, %esp call L_foo$stub fstpl (%esp) cvtsd2ss (%esp), %xmm0 cvtss2sd %xmm0, %xmm0 movl 16(%esp), %eax movsd %xmm0, (%eax) addl $12, %esp ret Note that we generate slightly worse code on pr1505b.ll due to a scheduling deficiency that is unrelated to this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46307 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-24 08:07:48 +00:00
/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
/// This is only run if not in -fast mode (aka -O0).
/// This allows the instruction selector to pick more read-modify-write
/// instructions. This is a common case:
///
/// [Load chain]
/// ^
/// |
/// [Load]
/// ^ ^
/// | |
/// / \-
/// / |
/// [TokenFactor] [Op]
/// ^ ^
/// | |
/// \ /
/// \ /
/// [Store]
///
/// The fact the store's chain operand != load's chain will prevent the
/// (store (op (load))) instruction from being selected. We can transform it to:
///
/// [Load chain]
/// ^
/// |
/// [TokenFactor]
/// ^
/// |
/// [Load]
/// ^ ^
/// | |
/// | \-
/// | |
/// | [Op]
/// | ^
/// | |
/// \ /
/// \ /
/// [Store]
Significantly simplify and improve handling of FP function results on x86-32. This case returns the value in ST(0) and then has to convert it to an SSE register. This causes significant codegen ugliness in some cases. For example in the trivial fp-stack-direct-ret.ll testcase we used to generate: _bar: subl $28, %esp call L_foo$stub fstpl 16(%esp) movsd 16(%esp), %xmm0 movsd %xmm0, 8(%esp) fldl 8(%esp) addl $28, %esp ret because we move the result of foo() into an XMM register, then have to move it back for the return of bar. Instead of hacking ever-more special cases into the call result lowering code we take a much simpler approach: on x86-32, fp return is modeled as always returning into an f80 register which is then truncated to f32 or f64 as needed. Similarly for a result, we model it as an extension to f80 + return. This exposes the truncate and extensions to the dag combiner, allowing target independent code to hack on them, eliminating them in this case. This gives us this code for the example above: _bar: subl $12, %esp call L_foo$stub addl $12, %esp ret The nasty aspect of this is that these conversions are not legal, but we want the second pass of dag combiner (post-legalize) to be able to hack on them. To handle this, we lie to legalize and say they are legal, then custom expand them on entry to the isel pass (PreprocessForFPConvert). This is gross, but less gross than the code it is replacing :) This also allows us to generate better code in several other cases. For example on fp-stack-ret-conv.ll, we now generate: _test: subl $12, %esp call L_foo$stub fstps 8(%esp) movl 16(%esp), %eax cvtss2sd 8(%esp), %xmm0 movsd %xmm0, (%eax) addl $12, %esp ret where before we produced (incidentally, the old bad code is identical to what gcc produces): _test: subl $12, %esp call L_foo$stub fstpl (%esp) cvtsd2ss (%esp), %xmm0 cvtss2sd %xmm0, %xmm0 movl 16(%esp), %eax movsd %xmm0, (%eax) addl $12, %esp ret Note that we generate slightly worse code on pr1505b.ll due to a scheduling deficiency that is unrelated to this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46307 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-24 08:07:48 +00:00
void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) {
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I) {
if (!ISD::isNON_TRUNCStore(I))
continue;
SDOperand Chain = I->getOperand(0);
if (Chain.Val->getOpcode() != ISD::TokenFactor)
continue;
SDOperand N1 = I->getOperand(1);
SDOperand N2 = I->getOperand(2);
if (MVT::isFloatingPoint(N1.getValueType()) ||
MVT::isVector(N1.getValueType()) ||
!N1.hasOneUse())
continue;
bool RModW = false;
SDOperand Load;
unsigned Opcode = N1.Val->getOpcode();
switch (Opcode) {
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::ADDC:
case ISD::ADDE: {
SDOperand N10 = N1.getOperand(0);
SDOperand N11 = N1.getOperand(1);
if (ISD::isNON_EXTLoad(N10.Val))
RModW = true;
else if (ISD::isNON_EXTLoad(N11.Val)) {
RModW = true;
std::swap(N10, N11);
}
RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
if (RModW)
Load = N10;
break;
}
case ISD::SUB:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
case ISD::SUBC:
case ISD::SUBE:
case X86ISD::SHLD:
case X86ISD::SHRD: {
SDOperand N10 = N1.getOperand(0);
if (ISD::isNON_EXTLoad(N10.Val))
RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
if (RModW)
Load = N10;
break;
}
}
if (RModW) {
MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
++NumLoadMoved;
}
}
}
Significantly simplify and improve handling of FP function results on x86-32. This case returns the value in ST(0) and then has to convert it to an SSE register. This causes significant codegen ugliness in some cases. For example in the trivial fp-stack-direct-ret.ll testcase we used to generate: _bar: subl $28, %esp call L_foo$stub fstpl 16(%esp) movsd 16(%esp), %xmm0 movsd %xmm0, 8(%esp) fldl 8(%esp) addl $28, %esp ret because we move the result of foo() into an XMM register, then have to move it back for the return of bar. Instead of hacking ever-more special cases into the call result lowering code we take a much simpler approach: on x86-32, fp return is modeled as always returning into an f80 register which is then truncated to f32 or f64 as needed. Similarly for a result, we model it as an extension to f80 + return. This exposes the truncate and extensions to the dag combiner, allowing target independent code to hack on them, eliminating them in this case. This gives us this code for the example above: _bar: subl $12, %esp call L_foo$stub addl $12, %esp ret The nasty aspect of this is that these conversions are not legal, but we want the second pass of dag combiner (post-legalize) to be able to hack on them. To handle this, we lie to legalize and say they are legal, then custom expand them on entry to the isel pass (PreprocessForFPConvert). This is gross, but less gross than the code it is replacing :) This also allows us to generate better code in several other cases. For example on fp-stack-ret-conv.ll, we now generate: _test: subl $12, %esp call L_foo$stub fstps 8(%esp) movl 16(%esp), %eax cvtss2sd 8(%esp), %xmm0 movsd %xmm0, (%eax) addl $12, %esp ret where before we produced (incidentally, the old bad code is identical to what gcc produces): _test: subl $12, %esp call L_foo$stub fstpl (%esp) cvtsd2ss (%esp), %xmm0 cvtss2sd %xmm0, %xmm0 movl 16(%esp), %eax movsd %xmm0, (%eax) addl $12, %esp ret Note that we generate slightly worse code on pr1505b.ll due to a scheduling deficiency that is unrelated to this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46307 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-24 08:07:48 +00:00
/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
/// nodes that target the FP stack to be store and load to the stack. This is a
/// gross hack. We would like to simply mark these as being illegal, but when
/// we do that, legalize produces these when it expands calls, then expands
/// these in the same legalize pass. We would like dag combine to be able to
/// hack on these between the call expansion and the node legalization. As such
/// this pass basically does "really late" legalization of these inline with the
/// X86 isel pass.
void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
// If the source and destination are SSE registers, then this is a legal
// conversion that should not be lowered.
MVT::ValueType SrcVT = N->getOperand(0).getValueType();
MVT::ValueType DstVT = N->getValueType(0);
bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
continue;
// If this is an FPStack extension (but not a truncation), it is a noop.
if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND)
continue;
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
MVT::ValueType MemVT;
if (N->getOpcode() == ISD::FP_ROUND)
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
MemVT = SrcIsSSE ? SrcVT : DstVT;
SDOperand MemTmp = DAG.CreateStackTemporary(MemVT);
// FIXME: optimize the case where the src/dest is a load or store?
SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0),
MemTmp, NULL, 0, MemVT);
SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp,
NULL, 0, MemVT);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
// anything below the conversion could be folded into other existing nodes.
// To avoid invalidating 'I', back it up to the convert node.
--I;
DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result);
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
DAG.DeleteNode(N);
}
}
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
DEBUG(BB->dump());
MachineFunction::iterator FirstMBB = BB;
if (!FastISel)
Significantly simplify and improve handling of FP function results on x86-32. This case returns the value in ST(0) and then has to convert it to an SSE register. This causes significant codegen ugliness in some cases. For example in the trivial fp-stack-direct-ret.ll testcase we used to generate: _bar: subl $28, %esp call L_foo$stub fstpl 16(%esp) movsd 16(%esp), %xmm0 movsd %xmm0, 8(%esp) fldl 8(%esp) addl $28, %esp ret because we move the result of foo() into an XMM register, then have to move it back for the return of bar. Instead of hacking ever-more special cases into the call result lowering code we take a much simpler approach: on x86-32, fp return is modeled as always returning into an f80 register which is then truncated to f32 or f64 as needed. Similarly for a result, we model it as an extension to f80 + return. This exposes the truncate and extensions to the dag combiner, allowing target independent code to hack on them, eliminating them in this case. This gives us this code for the example above: _bar: subl $12, %esp call L_foo$stub addl $12, %esp ret The nasty aspect of this is that these conversions are not legal, but we want the second pass of dag combiner (post-legalize) to be able to hack on them. To handle this, we lie to legalize and say they are legal, then custom expand them on entry to the isel pass (PreprocessForFPConvert). This is gross, but less gross than the code it is replacing :) This also allows us to generate better code in several other cases. For example on fp-stack-ret-conv.ll, we now generate: _test: subl $12, %esp call L_foo$stub fstps 8(%esp) movl 16(%esp), %eax cvtss2sd 8(%esp), %xmm0 movsd %xmm0, (%eax) addl $12, %esp ret where before we produced (incidentally, the old bad code is identical to what gcc produces): _test: subl $12, %esp call L_foo$stub fstpl (%esp) cvtsd2ss (%esp), %xmm0 cvtss2sd %xmm0, %xmm0 movl 16(%esp), %eax movsd %xmm0, (%eax) addl $12, %esp ret Note that we generate slightly worse code on pr1505b.ll due to a scheduling deficiency that is unrelated to this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46307 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-24 08:07:48 +00:00
PreprocessForRMW(DAG);
// FIXME: This should only happen when not -fast.
PreprocessForFPConvert(DAG);
// Codegen the basic block.
#ifndef NDEBUG
DOUT << "===== Instruction selection begins:\n";
Indent = 0;
#endif
DAG.setRoot(SelectRoot(DAG.getRoot()));
#ifndef NDEBUG
DOUT << "===== Instruction selection ends:\n";
#endif
DAG.RemoveDeadNodes();
// Emit machine code to BB.
ScheduleAndEmitDAG(DAG);
// If we are emitting FP stack code, scan the basic block to determine if this
// block defines any FP values. If so, put an FP_REG_KILL instruction before
// the terminator of the block.
// Note that FP stack instructions are used in all modes for long double,
// so we always need to do this check.
// Also note that it's possible for an FP stack register to be live across
// an instruction that produces multiple basic blocks (SSE CMOV) so we
// must check all the generated basic blocks.
// Scan all of the machine instructions in these MBBs, checking for FP
// stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
MachineFunction::iterator MBBI = FirstMBB;
do {
bool ContainsFPCode = false;
for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
!ContainsFPCode && I != E; ++I) {
if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
const TargetRegisterClass *clas;
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) ==
X86::RFP32RegisterClass ||
clas == X86::RFP64RegisterClass ||
clas == X86::RFP80RegisterClass)) {
ContainsFPCode = true;
break;
}
}
}
}
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
// a copy of the input value in this block. In SSE mode, we only care about
// 80-bit values.
if (!ContainsFPCode) {
// Final check, check LLVM BB's that are successors to the LLVM BB
// corresponding to BB for FP PHI nodes.
const BasicBlock *LLVMBB = BB->getBasicBlock();
const PHINode *PN;
for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
!ContainsFPCode && SI != E; ++SI) {
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
if (PN->getType()==Type::X86_FP80Ty ||
(!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) ||
(!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) {
ContainsFPCode = true;
break;
}
}
}
}
// Finally, if we found any FP code, emit the FP_REG_KILL instruction.
if (ContainsFPCode) {
BuildMI(*MBBI, MBBI->getFirstTerminator(),
TM.getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
}
} while (&*(MBBI++) != BB);
}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing())
BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
}
void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
// If this is main, emit special code for main.
MachineBasicBlock *BB = MF.begin();
if (Fn.hasExternalLinkage() && Fn.getName() == "main")
EmitSpecialCodeForMain(BB, MF.getFrameInfo());
}
/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode.
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
// Limit recursion.
if (Depth > 5)
return MatchAddressBase(N, AM, isRoot, Depth);
// RIP relative addressing: %rip + 32-bit displacement!
if (AM.isRIPRel) {
if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
}
return true;
}
int id = N.Val->getNodeId();
bool AlreadySelected = isSelected(id); // Already selected, not yet replaced.
switch (N.getOpcode()) {
default: break;
case ISD::Constant: {
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
break;
}
case X86ISD::Wrapper: {
bool is64Bit = Subtarget->is64Bit();
// Under X86-64 non-small code model, GV (and friends) are 64-bits.
// Also, base and index reg must be 0 in order to use rip as base.
if (is64Bit && (TM.getCodeModel() != CodeModel::Small ||
AM.Base.Reg.Val || AM.IndexReg.Val))
break;
if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
break;
// If value is available in a register both base and index components have
// been picked, we can't fit the result available in the register in the
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
SDOperand N0 = N.getOperand(0);
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
GlobalValue *GV = G->getGlobal();
AM.GV = GV;
AM.Disp += G->getOffset();
AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
Subtarget->isPICStyleRIPRel();
return false;
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
Subtarget->isPICStyleRIPRel();
return false;
} else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
Subtarget->isPICStyleRIPRel();
return false;
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
Subtarget->isPICStyleRIPRel();
return false;
}
}
break;
}
case ISD::FrameIndex:
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
}
break;
case ISD::SHL:
if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel)
break;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
unsigned Val = CN->getValue();
if (Val == 1 || Val == 2 || Val == 3) {
AM.Scale = 1 << Val;
SDOperand ShVal = N.Val->getOperand(0);
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
if (isInt32(Disp))
AM.Disp = Disp;
else
AM.IndexReg = ShVal;
} else {
AM.IndexReg = ShVal;
}
return false;
}
break;
}
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
// A mul_lohi where we need the low part can be folded as a plain multiply.
if (N.ResNo != 0) break;
// FALL THROUGH
case ISD::MUL:
// X*[3,5,9] -> X+X*[2,4,8]
if (!AlreadySelected &&
AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base.Reg.Val == 0 &&
AM.IndexReg.Val == 0 &&
!AM.isRIPRel) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
AM.Scale = unsigned(CN->getValue())-1;
SDOperand MulVal = N.Val->getOperand(0);
SDOperand Reg;
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
if (isInt32(Disp))
AM.Disp = Disp;
else
Reg = N.Val->getOperand(0);
} else {
Reg = N.Val->getOperand(0);
}
AM.IndexReg = AM.Base.Reg = Reg;
return false;
}
}
break;
case ISD::ADD:
if (!AlreadySelected) {
X86ISelAddressMode Backup = AM;
if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
return false;
AM = Backup;
if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1))
return false;
AM = Backup;
}
break;
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
if (AlreadySelected) break;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
X86ISelAddressMode Backup = AM;
// Start with the LHS as an addr mode.
if (!MatchAddress(N.getOperand(0), AM, false) &&
// Address could not have picked a GV address for the displacement.
AM.GV == NULL &&
// On x86-64, the resultant disp must fit in 32-bits.
isInt32(AM.Disp + CN->getSignExtended()) &&
// Check to see if the LHS & C is zero.
CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
AM.Disp += CN->getValue();
return false;
}
AM = Backup;
}
break;
case ISD::AND: {
// Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
// allows us to fold the shift into this addressing mode.
if (AlreadySelected) break;
SDOperand Shift = N.getOperand(0);
if (Shift.getOpcode() != ISD::SHL) break;
// Scale must not be used already.
if (AM.IndexReg.Val != 0 || AM.Scale != 1) break;
// Not when RIP is used as the base.
if (AM.isRIPRel) break;
ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
if (!C1 || !C2) break;
// Not likely to be profitable if either the AND or SHIFT node has more
// than one use (unless all uses are for address computation). Besides,
// isel mechanism requires their node ids to be reused.
if (!N.hasOneUse() || !Shift.hasOneUse())
break;
// Verify that the shift amount is something we can fold.
unsigned ShiftCst = C1->getValue();
if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
break;
// Get the new AND mask, this folds to a constant.
SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(),
SDOperand(C2, 0), SDOperand(C1, 0));
SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(),
Shift.getOperand(0), NewANDMask);
NewANDMask.Val->setNodeId(Shift.Val->getNodeId());
NewAND.Val->setNodeId(N.Val->getNodeId());
AM.Scale = 1 << ShiftCst;
AM.IndexReg = NewAND;
return false;
}
}
return MatchAddressBase(N, AM, isRoot, Depth);
}
/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
// If so, check to see if the scale index register is set.
if (AM.IndexReg.Val == 0 && !AM.isRIPRel) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
}
// Otherwise, we cannot select it.
return true;
}
// Default, generate it as a register.
AM.BaseType = X86ISelAddressMode::RegBase;
AM.Base.Reg = N;
return false;
}
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
X86ISelAddressMode AM;
if (MatchAddress(N, AM))
return false;
MVT::ValueType VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.Val)
AM.Base.Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.Val)
AM.IndexReg = CurDAG->getRegister(0, VT);
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
static inline bool isZeroNode(SDOperand Elt) {
return ((isa<ConstantSDNode>(Elt) &&
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
}
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp, SDOperand &InChain,
SDOperand &OutChain) {
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
InChain = N.getOperand(0).getValue(1);
if (ISD::isNON_EXTLoad(InChain.Val) &&
InChain.getValue(0).hasOneUse() &&
N.hasOneUse() &&
CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) {
LoadSDNode *LD = cast<LoadSDNode>(InChain);
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
return false;
OutChain = LD->getChain();
return true;
}
}
// Also handle the case where we explicitly require zeros in the top
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
Fix a long standing deficiency in the X86 backend: we would sometimes emit "zero" and "all one" vectors multiple times, for example: _test2: pcmpeqd %mm0, %mm0 movq %mm0, _M1 pcmpeqd %mm0, %mm0 movq %mm0, _M2 ret instead of: _test2: pcmpeqd %mm0, %mm0 movq %mm0, _M1 movq %mm0, _M2 ret This patch fixes this by always arranging for zero/one vectors to be defined as v4i32 or v2i32 (SSE/MMX) instead of letting them be any random type. This ensures they get trivially CSE'd on the dag. This fix is also important for LegalizeDAGTypes, as it gets unhappy when the x86 backend wants BUILD_VECTOR(i64 0) to be legal even when 'i64' isn't legal. This patch makes the following changes: 1) X86TargetLowering::LowerBUILD_VECTOR now lowers 0/1 vectors into their canonical types. 2) The now-dead patterns are removed from the SSE/MMX .td files. 3) All the patterns in the .td file that referred to immAllOnesV or immAllZerosV in the wrong form now use *_bc to match them with a bitcast wrapped around them. 4) X86DAGToDAGISel::SelectScalarSSELoad is generalized to handle bitcast'd zero vectors, which simplifies the code actually. 5) getShuffleVectorZeroOrUndef is updated to generate a shuffle that is legal, instead of generating one that is illegal and expecting a later legalize pass to clean it up. 6) isZeroShuffle is generalized to handle bitcast of zeros. 7) several other minor tweaks. This patch is definite goodness, but has the potential to cause random code quality regressions. Please be on the lookout for these and let me know if they happen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44310 91177308-0d34-0410-b5e6-96231b3b80d8
2007-11-25 00:24:49 +00:00
// Check to see if the top elements are all zeros (or bitcast of zeros).
ISD::isBuildVectorAllZeros(N.getOperand(0).Val) &&
N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(1).Val->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
N.getOperand(1).getOperand(0).hasOneUse()) {
// Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
// from the LHS.
Fix a long standing deficiency in the X86 backend: we would sometimes emit "zero" and "all one" vectors multiple times, for example: _test2: pcmpeqd %mm0, %mm0 movq %mm0, _M1 pcmpeqd %mm0, %mm0 movq %mm0, _M2 ret instead of: _test2: pcmpeqd %mm0, %mm0 movq %mm0, _M1 movq %mm0, _M2 ret This patch fixes this by always arranging for zero/one vectors to be defined as v4i32 or v2i32 (SSE/MMX) instead of letting them be any random type. This ensures they get trivially CSE'd on the dag. This fix is also important for LegalizeDAGTypes, as it gets unhappy when the x86 backend wants BUILD_VECTOR(i64 0) to be legal even when 'i64' isn't legal. This patch makes the following changes: 1) X86TargetLowering::LowerBUILD_VECTOR now lowers 0/1 vectors into their canonical types. 2) The now-dead patterns are removed from the SSE/MMX .td files. 3) All the patterns in the .td file that referred to immAllOnesV or immAllZerosV in the wrong form now use *_bc to match them with a bitcast wrapped around them. 4) X86DAGToDAGISel::SelectScalarSSELoad is generalized to handle bitcast'd zero vectors, which simplifies the code actually. 5) getShuffleVectorZeroOrUndef is updated to generate a shuffle that is legal, instead of generating one that is illegal and expecting a later legalize pass to clean it up. 6) isZeroShuffle is generalized to handle bitcast of zeros. 7) several other minor tweaks. This patch is definite goodness, but has the potential to cause random code quality regressions. Please be on the lookout for these and let me know if they happen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44310 91177308-0d34-0410-b5e6-96231b3b80d8
2007-11-25 00:24:49 +00:00
unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType());
SDOperand ShufMask = N.getOperand(2);
assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
if (C->getValue() == VecWidth) {
for (unsigned i = 1; i != VecWidth; ++i) {
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
// ok.
} else {
ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
if (C->getValue() >= VecWidth) return false;
}
}
}
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
return false;
OutChain = LD->getChain();
InChain = SDOperand(LD, 1);
return true;
}
}
return false;
}
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
X86ISelAddressMode AM;
if (MatchAddress(N, AM))
return false;
MVT::ValueType VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.Val)
Complexity = 1;
else
AM.Base.Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.Val)
Complexity++;
else
AM.IndexReg = CurDAG->getRegister(0, VT);
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
// a simple shift.
if (AM.Scale > 1)
Complexity++;
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
// to a LEA. This is determined with some expermentation but is by no means
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
// For X86-64, we should always use lea to materialize RIP relative
// addresses.
if (Subtarget->is64Bit())
Complexity = 4;
else
Complexity += 2;
}
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
Complexity++;
if (Complexity > 2) {
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
return false;
}
bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
if (ISD::isNON_EXTLoad(N.Val) &&
N.hasOneUse() &&
CanBeFoldedBy(N.Val, P.Val, P.Val))
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
return false;
}
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineFunction *MF = BB->getParent();
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
const TargetInstrInfo *TII = TM.getInstrInfo();
// Operand of MovePCtoStack is completely ignored by asm printer. It's
// only used in JIT code emission as displacement to pc.
BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0);
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
if (TM.getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT()) {
GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg)
.addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
} else {
GlobalBaseReg = PC;
}
}
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
}
static SDNode *FindCallStartFromCall(SDNode *Node) {
if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
assert(Node->getOperand(0).getValueType() == MVT::Other &&
"Node doesn't have a token chain argument!");
return FindCallStartFromCall(Node->getOperand(0).Val);
}
SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) {
SDOperand SRIdx;
switch (VT) {
case MVT::i8:
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
// Ensure that the source register has an 8-bit subreg on 32-bit targets
if (!Subtarget->is64Bit()) {
unsigned Opc;
MVT::ValueType VT;
switch (N0.getValueType()) {
default: assert(0 && "Unknown truncate!");
case MVT::i16:
Opc = X86::MOV16to16_;
VT = MVT::i16;
break;
case MVT::i32:
Opc = X86::MOV32to32_;
VT = MVT::i32;
break;
}
N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0);
return CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
VT, N0, SRIdx, N0.getValue(1));
}
break;
case MVT::i16:
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
break;
case MVT::i32:
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
break;
default: assert(0 && "Unknown truncate!"); break;
}
return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx);
}
SDNode *X86DAGToDAGISel::Select(SDOperand N) {
SDNode *Node = N.Val;
MVT::ValueType NVT = Node->getValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
#ifndef NDEBUG
DOUT << std::string(Indent, ' ') << "Selecting: ";
DEBUG(Node->dump(CurDAG));
DOUT << "\n";
Indent += 2;
#endif
if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "== ";
DEBUG(Node->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return NULL; // Already selected.
}
switch (Opcode) {
default: break;
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
case X86ISD::FP_GET_RESULT2: {
SDOperand Chain = N.getOperand(0);
SDOperand InFlag = N.getOperand(1);
AddToISelQueue(Chain);
AddToISelQueue(InFlag);
std::vector<MVT::ValueType> Tys;
Tys.push_back(MVT::f80);
Tys.push_back(MVT::f80);
Tys.push_back(MVT::Other);
Tys.push_back(MVT::Flag);
SDOperand Ops[] = { Chain, InFlag };
SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys,
Ops, 2);
Chain = SDOperand(ResNode, 2);
InFlag = SDOperand(ResNode, 3);
ReplaceUses(SDOperand(N.Val, 2), Chain);
ReplaceUses(SDOperand(N.Val, 3), InFlag);
return ResNode;
}
case ISD::ADD: {
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
// code and is matched first so to prevent it from being turned into
// LEA32r X+c.
// In 64-bit small code size mode, use LEA to take advantage of
// RIP-relative addressing.
if (TM.getCodeModel() != CodeModel::Small)
break;
MVT::ValueType PtrVT = TLI.getPointerTy();
SDOperand N0 = N.getOperand(0);
SDOperand N1 = N.getOperand(1);
if (N.Val->getValueType(0) == PtrVT &&
N0.getOpcode() == X86ISD::Wrapper &&
N1.getOpcode() == ISD::Constant) {
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
SDOperand C(0, 0);
// TODO: handle ExternalSymbolSDNode.
if (GlobalAddressSDNode *G =
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
G->getOffset() + Offset);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
CP->getAlignment(),
CP->getOffset()+Offset);
}
if (C.Val) {
if (Subtarget->is64Bit()) {
SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
CurDAG->getRegister(0, PtrVT), C };
return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
} else
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
}
}
// Other cases are handled by auto-generated code.
break;
}
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDOperand N0 = Node->getOperand(0);
SDOperand N1 = Node->getOperand(1);
// There are several forms of IMUL that just return the low part and
// don't have fixed-register operands. If we don't need the high part,
// use these instead. They can be selected with the generated ISel code.
if (NVT != MVT::i8 &&
N.getValue(1).use_empty()) {
N = CurDAG->getNode(ISD::MUL, NVT, N0, N1);
break;
}
bool isSigned = Opcode == ISD::SMUL_LOHI;
if (!isSigned)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
else
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
unsigned LoReg, HiReg;
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
}
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
// multiplty is commmutative
if (!foldedLoad) {
foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
if (foldedLoad)
std::swap(N0, N1);
}
AddToISelQueue(N0);
SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg,
N0, SDOperand()).getValue(1);
if (foldedLoad) {
AddToISelQueue(N1.getOperand(0));
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
SDNode *CNode =
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
InFlag = SDOperand(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDOperand(CNode, 0));
} else {
AddToISelQueue(N1);
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
// Copy the low half of the result, if it is needed.
if (!N.getValue(0).use_empty()) {
SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(N.getValue(0), Result);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
#endif
}
// Copy the high half of the result, if it is needed.
if (!N.getValue(1).use_empty()) {
SDOperand Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
MVT::i8, Result, SRIdx), 0);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
}
ReplaceUses(N.getValue(1), Result);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
#endif
}
#ifndef NDEBUG
Indent -= 2;
#endif
return NULL;
}
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDOperand N0 = Node->getOperand(0);
SDOperand N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
else
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
unsigned LoReg, HiReg;
unsigned ClrOpcode, SExtOpcode;
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; HiReg = X86::AH;
ClrOpcode = 0;
SExtOpcode = X86::CBW;
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
ClrOpcode = X86::MOV16r0;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
LoReg = X86::EAX; HiReg = X86::EDX;
ClrOpcode = X86::MOV32r0;
SExtOpcode = X86::CDQ;
break;
case MVT::i64:
LoReg = X86::RAX; HiReg = X86::RDX;
ClrOpcode = X86::MOV64r0;
SExtOpcode = X86::CQO;
break;
}
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
SDOperand InFlag;
if (NVT == MVT::i8 && !isSigned) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain;
if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) {
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) };
AddToISelQueue(N0.getOperand(0));
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
Move =
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other,
Ops, 5), 0);
Chain = Move.getValue(1);
ReplaceUses(N0.getValue(1), Chain);
} else {
AddToISelQueue(N0);
Move =
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0);
Chain = CurDAG->getEntryNode();
}
Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand());
InFlag = Chain.getValue(1);
} else {
AddToISelQueue(N0);
InFlag =
CurDAG->getCopyToReg(CurDAG->getEntryNode(),
LoReg, N0, SDOperand()).getValue(1);
if (isSigned) {
// Sign extend the low part into the high part.
InFlag =
SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
} else {
// Zero out the high part, effectively zero extending the input.
SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg,
ClrNode, InFlag).getValue(1);
}
}
if (foldedLoad) {
AddToISelQueue(N1.getOperand(0));
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
SDNode *CNode =
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
InFlag = SDOperand(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDOperand(CNode, 0));
} else {
AddToISelQueue(N1);
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
// Copy the division (low) result, if it is needed.
if (!N.getValue(0).use_empty()) {
SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(N.getValue(0), Result);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
#endif
}
// Copy the remainder (high) result, if it is needed.
if (!N.getValue(1).use_empty()) {
SDOperand Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
MVT::i8, Result, SRIdx), 0);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
}
ReplaceUses(N.getValue(1), Result);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
#endif
}
#ifndef NDEBUG
Indent -= 2;
#endif
return NULL;
}
case ISD::ANY_EXTEND: {
SDOperand N0 = Node->getOperand(0);
AddToISelQueue(N0);
if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) {
SDOperand SRIdx;
switch(N0.getValueType()) {
case MVT::i32:
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
break;
case MVT::i16:
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
break;
case MVT::i8:
if (Subtarget->is64Bit())
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
break;
default: assert(0 && "Unknown any_extend!");
}
if (SRIdx.Val) {
SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG,
NVT, N0, SRIdx);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
} // Otherwise let generated ISel handle it.
}
break;
}
case ISD::SIGN_EXTEND_INREG: {
SDOperand N0 = Node->getOperand(0);
AddToISelQueue(N0);
MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0);
unsigned Opc = 0;
switch (NVT) {
case MVT::i16:
if (SVT == MVT::i8) Opc = X86::MOVSX16rr8;
else assert(0 && "Unknown sign_extend_inreg!");
break;
case MVT::i32:
switch (SVT) {
case MVT::i8: Opc = X86::MOVSX32rr8; break;
case MVT::i16: Opc = X86::MOVSX32rr16; break;
default: assert(0 && "Unknown sign_extend_inreg!");
}
break;
case MVT::i64:
switch (SVT) {
case MVT::i8: Opc = X86::MOVSX64rr8; break;
case MVT::i16: Opc = X86::MOVSX64rr16; break;
case MVT::i32: Opc = X86::MOVSX64rr32; break;
default: assert(0 && "Unknown sign_extend_inreg!");
}
break;
default: assert(0 && "Unknown sign_extend_inreg!");
}
SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(TruncOp.Val->dump(CurDAG));
DOUT << "\n";
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
break;
}
case ISD::TRUNCATE: {
SDOperand Input = Node->getOperand(0);
AddToISelQueue(Node->getOperand(0));
SDNode *ResNode = getTruncate(Input, NVT);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
break;
}
}
SDNode *ResNode = SelectCode(N);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
if (ResNode == NULL || ResNode == N.Val)
DEBUG(N.Val->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif
return ResNode;
}
bool X86DAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
SDOperand Op0, Op1, Op2, Op3;
switch (ConstraintCode) {
case 'o': // offsetable ??
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3))
return true;
break;
}
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
OutOps.push_back(Op3);
AddToISelQueue(Op0);
AddToISelQueue(Op1);
AddToISelQueue(Op2);
AddToISelQueue(Op3);
return false;
}
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
return new X86DAGToDAGISel(TM, Fast);
}