llvm-6502/lib/Target/PTX/PTXISelLowering.cpp
Duncan Sands 28b77e968d Add codegen support for vector select (in the IR this means a select
with a vector condition); such selects become VSELECT codegen nodes.
This patch also removes VSETCC codegen nodes, unifying them with SETCC
nodes (codegen was actually often using SETCC for vector SETCC already).
This ensures that various DAG combiner optimizations kick in for vector
comparisons.  Passes dragonegg bootstrap with no testsuite regressions
(nightly testsuite as well as "make check-all").  Patch mostly by
Nadav Rotem.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139159 91177308-0d34-0410-b5e6-96231b3b80d8
2011-09-06 19:07:46 +00:00

398 lines
13 KiB
C++

//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PTXTargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "PTX.h"
#include "PTXISelLowering.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "PTXGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setMinFunctionAlignment(2);
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
// (any/zero/sign) extload => load + (any/zero/sign) extend
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
// f32 extload => load + fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// f64 truncstore => trunc + store
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// sign_extend_inreg => sign_extend
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// br_cc => brcond
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// select_cc => setcc
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
////////////////////////////////////
//////////// Legal /////////////////
////////////////////////////////////
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
////////////////////////////////////
//////////// Custom ////////////////
////////////////////////////////////
// customise setcc to use bitwise logic if possible
setOperationAction(ISD::SETCC, MVT::i1, Custom);
// customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// Compute derived properties from the register classes
computeRegisterProperties();
}
EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unimplemented operand");
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
}
}
const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
llvm_unreachable("Unknown opcode");
case PTXISD::COPY_ADDRESS:
return "PTXISD::COPY_ADDRESS";
case PTXISD::LOAD_PARAM:
return "PTXISD::LOAD_PARAM";
case PTXISD::STORE_PARAM:
return "PTXISD::STORE_PARAM";
case PTXISD::EXIT:
return "PTXISD::EXIT";
case PTXISD::RET:
return "PTXISD::RET";
case PTXISD::CALL:
return "PTXISD::CALL";
}
}
//===----------------------------------------------------------------------===//
// Custom Lower Operation
//===----------------------------------------------------------------------===//
SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// Look for X == 0, X == 1, X != 0, or X != 1
// We can simplify these to bitwise logic
if (Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
}
return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
}
SDValue PTXTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
dl,
PtrVT.getSimpleVT(),
targetGlobal);
return movInstr;
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
SDValue PTXTargetLowering::
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
if (isVarArg) llvm_unreachable("PTX does not support varargs");
MachineFunction &MF = DAG.getMachineFunction();
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
switch (CallConv) {
default:
llvm_unreachable("Unsupported calling convention");
break;
case CallingConv::PTX_Kernel:
MFI->setKernel(true);
break;
case CallingConv::PTX_Device:
MFI->setKernel(false);
break;
}
// We do one of two things here:
// IsKernel || SM >= 2.0 -> Use param space for arguments
// SM < 2.0 -> Use registers for arguments
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
"Kernels cannot take pred operands");
SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
DAG.getTargetConstant(i, MVT::i32));
InVals.push_back(ArgValue);
// Instead of storing a physical register in our argument list, we just
// store the total size of the parameter, in bits. The ASM printer
// knows how to process this.
MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
}
}
else {
// For device functions, we use the PTX calling convention to do register
// assignments then create CopyFromReg ISDs for the allocated registers
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign& VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
TargetRegisterClass* TRC = 0;
assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
// Determine which register class we need
if (RegVT == MVT::i1) {
TRC = PTX::RegPredRegisterClass;
}
else if (RegVT == MVT::i16) {
TRC = PTX::RegI16RegisterClass;
}
else if (RegVT == MVT::i32) {
TRC = PTX::RegI32RegisterClass;
}
else if (RegVT == MVT::i64) {
TRC = PTX::RegI64RegisterClass;
}
else if (RegVT == MVT::f32) {
TRC = PTX::RegF32RegisterClass;
}
else if (RegVT == MVT::f64) {
TRC = PTX::RegF64RegisterClass;
}
else {
llvm_unreachable("Unknown parameter type");
}
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
InVals.push_back(ArgValue);
MFI->addArgReg(VA.getLocReg());
}
}
return Chain;
}
SDValue PTXTargetLowering::
LowerReturn(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl,
SelectionDAG &DAG) const {
if (isVarArg) llvm_unreachable("PTX does not support varargs");
switch (CallConv) {
default:
llvm_unreachable("Unsupported calling convention.");
case CallingConv::PTX_Kernel:
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
//assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
SDValue Flag;
// Even though we could use the .param space for return arguments for
// device functions if SM >= 2.0 and the number of return arguments is
// only 1, we just always use registers since this makes the codegen
// easier.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign& VA = RVLocs[i];
assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
unsigned Reg = VA.getLocReg();
DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad
Flag = Chain.getValue(1);
MFI->addRetReg(Reg);
}
if (Flag.getNode() == 0) {
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
}
else {
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
}
}
SDValue
PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
"Calls are not handled for the target device");
// Is there a more "LLVM"-way to create a variable-length array of values?
SDValue* ops = new SDValue[OutVals.size() + 2];
ops[0] = Chain;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
ops[1] = Callee;
} else {
assert(false && "Function must be a GlobalAddressSDNode");
}
for (unsigned i = 0; i != OutVals.size(); ++i) {
unsigned Size = OutVals[i].getValueType().getSizeInBits();
SDValue Index = DAG.getTargetConstant(MFI->getNextParam(Size), MVT::i32);
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
Index, OutVals[i]);
ops[i+2] = Index;
}
ops[0] = Chain;
Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, ops, OutVals.size()+2);
delete [] ops;
return Chain;
}