llvm-6502/lib/Target/NVPTX/VectorElementize.cpp
Chandler Carruth 0b8c9a80f2 Move all of the header files which are involved in modelling the LLVM IR
into their new header subdirectory: include/llvm/IR. This matches the
directory structure of lib, and begins to correct a long standing point
of file layout clutter in LLVM.

There are still more header files to move here, but I wanted to handle
them in separate commits to make tracking what files make sense at each
layer easier.

The only really questionable files here are the target intrinsic
tablegen files. But that's a battle I'd rather not fight today.

I've updated both CMake and Makefile build systems (I think, and my
tests think, but I may have missed something).

I've also re-sorted the includes throughout the project. I'll be
committing updates to Clang, DragonEgg, and Polly momentarily.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171366 91177308-0d34-0410-b5e6-96231b3b80d8
2013-01-02 11:36:10 +00:00

1240 lines
52 KiB
C++

//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass converts operations on vector types to operations on their
// element types.
//
// For generic binary and unary vector instructions, the conversion is simple.
// Suppose we have
// av = bv Vop cv
// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
// This gets converted to the following :
// a1 = b1 Sop c1
// a2 = b2 Sop c2
//
// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
// For the above example, the map will look as follows:
// av => [a1, a2]
// bv => [b1, b2]
//
// In addition, initVectorInfo creates the following opcode->opcode map.
// Vop => Sop
// OtherVop => OtherSop
// ...
//
// For vector specific instructions like vecbuild, vecshuffle etc, the
// conversion is different. Look at comments near the functions with
// prefix createVec<...>.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
namespace {
class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
virtual bool runOnMachineFunction(MachineFunction &F);
NVPTXTargetMachine &TM;
MachineRegisterInfo *MRI;
const NVPTXRegisterInfo *RegInfo;
const NVPTXInstrInfo *InstrInfo;
llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
RegClassMap;
llvm::DenseMap<unsigned, bool> SimpleMoveMap;
llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
bool isVectorInstr(MachineInstr *);
SmallVector<unsigned, 4> getScalarRegisters(unsigned);
unsigned getScalarVersion(unsigned);
unsigned getScalarVersion(MachineInstr *);
bool isVectorRegister(unsigned);
const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
unsigned numCopiesNeeded(MachineInstr *);
void createLoadCopy(MachineFunction&, MachineInstr *,
std::vector<MachineInstr *>&);
void createStoreCopy(MachineFunction&, MachineInstr *,
std::vector<MachineInstr *>&);
void createVecDest(MachineFunction&, MachineInstr *,
std::vector<MachineInstr *>&);
void createCopies(MachineFunction&, MachineInstr *,
std::vector<MachineInstr *>&);
unsigned copyProp(MachineFunction&);
unsigned removeDeadMoves(MachineFunction&);
void elementize(MachineFunction&);
bool isSimpleMove(MachineInstr *);
void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies);
void createVecExtract(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies);
void createVecInsert(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies);
void createVecBuild(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies);
public:
static char ID; // Pass identification, replacement for typeid
VectorElementize(NVPTXTargetMachine &tm)
: MachineFunctionPass(ID), TM(tm) {}
virtual const char *getPassName() const {
return "Convert LLVM vector types to their element types";
}
};
char VectorElementize::ID = 1;
}
static cl::opt<bool>
RemoveRedundantMoves("nvptx-remove-redundant-moves",
cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
cl::init(true));
#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
>> NVPTX::VecInstTypeShift)
#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad)
#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore)
#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild)
#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert)
#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest)
bool VectorElementize::isSimpleMove(MachineInstr *mi) {
if (mi->isCopy())
return true;
unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
>> NVPTX::SimpleMoveShift;
return (TSFlags == 1);
}
bool VectorElementize::isVectorInstr(MachineInstr *mi) {
if ((mi->getOpcode() == NVPTX::PHI) ||
(mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
MachineOperand dest = mi->getOperand(0);
return isVectorRegister(dest.getReg());
}
return ISVECINST(mi);
}
unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
return getScalarVersion(mi->getOpcode());
}
///=============================================================================
///Instr is assumed to be a vector instruction. For most vector instructions,
///the size of the destination vector register gives the number of scalar copies
///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
///number of scalar copies needed.
///=============================================================================
unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
unsigned numDefs=0;
unsigned def;
for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
MachineOperand oper = Instr->getOperand(i);
if (!oper.isReg()) continue;
if (!oper.isDef()) continue;
def = i;
numDefs++;
}
assert((numDefs <= 1) && "Only 0 or 1 defs supported");
if (numDefs == 1) {
unsigned regnum = Instr->getOperand(def).getReg();
if (ISVECEXTRACT(Instr))
regnum = Instr->getOperand(1).getReg();
return getNVPTXVectorSize(MRI->getRegClass(regnum));
}
else if (numDefs == 0) {
assert(ISVECSTORE(Instr)
&& "Only 0 def instruction supported is vector store");
unsigned regnum = Instr->getOperand(0).getReg();
return getNVPTXVectorSize(MRI->getRegClass(regnum));
}
return 1;
}
const TargetRegisterClass *VectorElementize::
getScalarRegClass(const TargetRegisterClass *RC) {
assert(isNVPTXVectorRegClass(RC) &&
"Not a vector register class");
return getNVPTXElemClass(RC);
}
bool VectorElementize::isVectorRegister(unsigned reg) {
const TargetRegisterClass *RC=MRI->getRegClass(reg);
return isNVPTXVectorRegClass(RC);
}
///=============================================================================
///For every vector register 'v' that is not already in the VectorToScalarMap,
///create n scalar registers of the corresponding element type, where n
///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
///=============================================================================
SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
assert(isVectorRegister(regnum) && "Expecting a vector register here");
// Create the scalar registers and put them in the map, if not already there.
if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
SmallVector<unsigned, 4> temp;
for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
temp.push_back(MRI->createVirtualRegister(scalarClass));
VectorToScalarMap[regnum] = temp;
}
return VectorToScalarMap[regnum];
}
///=============================================================================
///For a vector load of the form
///va <= ldv2 [addr]
///the following multi output instruction is created :
///[v1, v2] <= LD [addr]
///Look at NVPTXVector.td for the definitions of multi output loads.
///=============================================================================
void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
copies.push_back(F.CloneMachineInstr(Instr));
MachineInstrBuilder copy(F, copies[0]);
copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
// Remove the dest, that should be a vector operand.
MachineOperand dest = copy->getOperand(0);
unsigned regnum = dest.getReg();
SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
copy->RemoveOperand(0);
std::vector<MachineOperand> otherOperands;
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
otherOperands.push_back(copy->getOperand(i));
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
copy->RemoveOperand(0);
for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
copy.addReg(scalarRegs[i], RegState::Define);
for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
copy.addOperand(otherOperands[i]);
}
///=============================================================================
///For a vector store of the form
///stv2 va, [addr]
///the following multi input instruction is created :
///ST v1, v2, [addr]
///Look at NVPTXVector.td for the definitions of multi input stores.
///=============================================================================
void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
copies.push_back(F.CloneMachineInstr(Instr));
MachineInstrBuilder copy(F, copies[0]);
copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
MachineOperand src = copy->getOperand(0);
unsigned regnum = src.getReg();
SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
copy->RemoveOperand(0);
std::vector<MachineOperand> otherOperands;
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
otherOperands.push_back(copy->getOperand(i));
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
copy->RemoveOperand(0);
for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
copy.addReg(scalarRegs[i]);
for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
copy.addOperand(otherOperands[i]);
}
///=============================================================================
///va <= shufflev2 vb, vc, <i1>, <i2>
///gets converted to 2 moves into a1 and a2. The source of the moves depend on
///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
///instructions will be
///a1 <= c2
///a2 <= b1
///=============================================================================
void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
unsigned numcopies=numCopiesNeeded(Instr);
unsigned destregnum = Instr->getOperand(0).getReg();
unsigned src1regnum = Instr->getOperand(1).getReg();
unsigned src2regnum = Instr->getOperand(2).getReg();
SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
DebugLoc DL = Instr->getDebugLoc();
for (unsigned i=0; i<numcopies; i++) {
MachineInstrBuilder copy =
BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
MachineOperand which=Instr->getOperand(3+i);
assert(which.isImm() && "Shuffle operand not a constant");
int src=which.getImm();
int elem=src%numcopies;
if (which.getImm() < numcopies)
copy.addReg(src1[elem]);
else
copy.addReg(src2[elem]);
copies.push_back(copy);
}
}
///=============================================================================
///a <= extractv2 va, <i1>
///gets turned into a simple move to the scalar register a. The source depends
///on i1.
///=============================================================================
void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
unsigned srcregnum = Instr->getOperand(1).getReg();
SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
MachineOperand which = Instr->getOperand(2);
assert(which.isImm() && "Extract operand not a constant");
DebugLoc DL = Instr->getDebugLoc();
copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
Instr->getOperand(0).getReg())
.addReg(src[which.getImm()]));
}
///=============================================================================
///va <= vecinsertv2 vb, c, <i1>
///This instruction copies all elements of vb to va, except the 'i1'th element.
///The scalar value c becomes the 'i1'th element of va.
///This gets translated to 2 (4 for vecinsertv4) moves.
///=============================================================================
void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
unsigned numcopies=numCopiesNeeded(Instr);
unsigned destregnum = Instr->getOperand(0).getReg();
unsigned srcregnum = Instr->getOperand(1).getReg();
SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
MachineOperand which=Instr->getOperand(3);
assert(which.isImm() && "Insert operand not a constant");
unsigned int elem=which.getImm();
DebugLoc DL = Instr->getDebugLoc();
for (unsigned i=0; i<numcopies; i++) {
MachineInstrBuilder copy =
BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
if (i != elem)
copy.addReg(src[i]);
else
copy.addOperand(Instr->getOperand(2));
copies.push_back(copy);
}
}
///=============================================================================
///va <= buildv2 b1, b2
///gets translated to
///a1 <= b1
///a2 <= b2
///=============================================================================
void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
unsigned numcopies=numCopiesNeeded(Instr);
unsigned destregnum = Instr->getOperand(0).getReg();
SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
DebugLoc DL = Instr->getDebugLoc();
for (unsigned i=0; i<numcopies; i++)
copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
dest[i])
.addOperand(Instr->getOperand(1+i)));
}
///=============================================================================
///For a tex inst of the form
///va <= op [scalar operands]
///the following multi output instruction is created :
///[v1, v2] <= op' [scalar operands]
///=============================================================================
void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
copies.push_back(F.CloneMachineInstr(Instr));
MachineInstrBuilder copy(F, copies[0]);
copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
// Remove the dest, that should be a vector operand.
MachineOperand dest = copy->getOperand(0);
unsigned regnum = dest.getReg();
SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
copy->RemoveOperand(0);
std::vector<MachineOperand> otherOperands;
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
otherOperands.push_back(copy->getOperand(i));
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
copy->RemoveOperand(0);
for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
copy.addReg(scalarRegs[i], RegState::Define);
for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
copy.addOperand(otherOperands[i]);
}
///=============================================================================
///Look at the vector instruction type and dispatch to the createVec<...>
///function that creates the scalar copies.
///=============================================================================
void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
if (ISVECLOAD(Instr)) {
createLoadCopy(F, Instr, copies);
return;
}
if (ISVECSTORE(Instr)) {
createStoreCopy(F, Instr, copies);
return;
}
if (ISVECSHUFFLE(Instr)) {
createVecShuffle(F, Instr, copies);
return;
}
if (ISVECEXTRACT(Instr)) {
createVecExtract(F, Instr, copies);
return;
}
if (ISVECINSERT(Instr)) {
createVecInsert(F, Instr, copies);
return;
}
if (ISVECDEST(Instr)) {
createVecDest(F, Instr, copies);
return;
}
if (ISVECBUILD(Instr)) {
createVecBuild(F, Instr, copies);
return;
}
unsigned numcopies=numCopiesNeeded(Instr);
for (unsigned i=0; i<numcopies; ++i)
copies.push_back(F.CloneMachineInstr(Instr));
for (unsigned i=0; i<numcopies; ++i) {
MachineInstrBuilder copy(F, copies[i]);
std::vector<MachineOperand> allOperands;
std::vector<bool> isDef;
for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
MachineOperand oper = copy->getOperand(j);
allOperands.push_back(oper);
if (oper.isReg())
isDef.push_back(oper.isDef());
else
isDef.push_back(false);
}
for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
copy->RemoveOperand(0);
copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
MachineOperand oper=allOperands[j];
if (oper.isReg()) {
unsigned regnum = oper.getReg();
if (isVectorRegister(regnum)) {
SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
copy.addReg(scalarRegs[i], getDefRegState(isDef[j]));
}
else
copy.addOperand(oper);
}
else
copy.addOperand(oper);
}
}
}
///=============================================================================
///Scan through all basic blocks, looking for vector instructions.
///For each vector instruction I, insert the scalar copies before I, and
///add I into toRemove vector. Finally remove all instructions in toRemove.
///=============================================================================
void VectorElementize::elementize(MachineFunction &F) {
for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
BI!=BE; ++BI) {
MachineBasicBlock *BB = &*BI;
std::vector<MachineInstr *> copies;
std::vector<MachineInstr *> toRemove;
for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
II!=IE; ++II) {
MachineInstr *Instr = &*II;
if (!isVectorInstr(Instr))
continue;
copies.clear();
createCopies(F, Instr, copies);
for (unsigned i=0, e=copies.size(); i!=e; ++i)
BB->insert(II, copies[i]);
assert((copies.size() > 0) && "Problem in createCopies");
toRemove.push_back(Instr);
}
for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
}
}
///=============================================================================
///a <= b
///...
///...
///x <= op(a, ...)
///gets converted to
///
///x <= op(b, ...)
///The original move is still present. This works on SSA form machine code.
///Note that a <= b should be a simple vreg-to-vreg move instruction.
///TBD : I didn't find a function that can do replaceOperand, so I remove
///all operands and add all of them again, replacing the one while adding.
///=============================================================================
unsigned VectorElementize::copyProp(MachineFunction &F) {
unsigned numReplacements = 0;
for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
++BI) {
MachineBasicBlock *BB = &*BI;
for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
++II) {
MachineInstr *Instr = &*II;
// Don't do copy propagation on PHI as it will cause unnecessary
// live range overlap.
if ((Instr->getOpcode() == TargetOpcode::PHI) ||
(Instr->getOpcode() == TargetOpcode::DBG_VALUE))
continue;
bool needsReplacement = false;
for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
MachineOperand oper = Instr->getOperand(i);
if (!oper.isReg()) continue;
if (oper.isDef()) continue;
if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
if (!defInstr) continue;
if (!isSimpleMove(defInstr)) continue;
MachineOperand defSrc = defInstr->getOperand(1);
if (!defSrc.isReg()) continue;
if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
needsReplacement = true;
}
if (!needsReplacement) continue;
numReplacements++;
std::vector<MachineOperand> operands;
for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
MachineOperand oper = Instr->getOperand(i);
bool flag = false;
do {
if (!(oper.isReg()))
break;
if (oper.isDef())
break;
if (!(RegInfo->isVirtualRegister(oper.getReg())))
break;
MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
if (!(isSimpleMove(defInstr)))
break;
MachineOperand defSrc = defInstr->getOperand(1);
if (!(defSrc.isReg()))
break;
if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
break;
operands.push_back(defSrc);
flag = true;
} while (0);
if (flag == false)
operands.push_back(oper);
}
for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
Instr->RemoveOperand(0);
for (unsigned i=0, e=operands.size(); i!=e; ++i)
Instr->addOperand(F, operands[i]);
}
}
return numReplacements;
}
///=============================================================================
///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
///them to deadMoves vector. Then remove all instructions in deadMoves.
///=============================================================================
unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
std::vector<MachineInstr *> deadMoves;
for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
++BI) {
MachineBasicBlock *BB = &*BI;
for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
++II) {
MachineInstr *Instr = &*II;
if (!isSimpleMove(Instr)) continue;
MachineOperand dest = Instr->getOperand(0);
assert(dest.isReg() && "dest of move not a register");
assert(RegInfo->isVirtualRegister(dest.getReg()) &&
"dest of move not a virtual register");
if (MRI->use_empty(dest.getReg())) {
deadMoves.push_back(Instr);
}
}
}
for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
return deadMoves.size();
}
///=============================================================================
///Main function for this pass.
///=============================================================================
bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
MRI = &F.getRegInfo();
RegInfo = TM.getRegisterInfo();
InstrInfo = TM.getInstrInfo();
VectorToScalarMap.clear();
elementize(F);
if (RemoveRedundantMoves)
while (1) {
if (copyProp(F) == 0) break;
removeDeadMoves(F);
}
return true;
}
FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
return new VectorElementize(tm);
}
unsigned VectorElementize::getScalarVersion(unsigned opcode) {
if (opcode == NVPTX::PHI)
return opcode;
if (opcode == NVPTX::IMPLICIT_DEF)
return opcode;
switch(opcode) {
default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
case TargetOpcode::COPY: return TargetOpcode::COPY;
case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
case NVPTX::VNegV2I16: return NVPTX::INEG16;
case NVPTX::VNegV2I32: return NVPTX::INEG32;
case NVPTX::VNegV2I64: return NVPTX::INEG64;
case NVPTX::VNegV2I8: return NVPTX::INEG8;
case NVPTX::VNegV4I16: return NVPTX::INEG16;
case NVPTX::VNegV4I32: return NVPTX::INEG32;
case NVPTX::VNegV4I8: return NVPTX::INEG8;
case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
case NVPTX::VNotV2I16: return NVPTX::NOT16;
case NVPTX::VNotV2I32: return NVPTX::NOT32;
case NVPTX::VNotV2I64: return NVPTX::NOT64;
case NVPTX::VNotV2I8: return NVPTX::NOT8;
case NVPTX::VNotV4I16: return NVPTX::NOT16;
case NVPTX::VNotV4I32: return NVPTX::NOT32;
case NVPTX::VNotV4I8: return NVPTX::NOT8;
case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari;
case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi;
case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari;
case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi;
case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari;
case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi;
case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari;
case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi;
case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari;
case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi;
case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari;
case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi;
case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari;
case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi;
case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari;
case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi;
case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari;
case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi;
case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari;
case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi;
case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari;
case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi;
case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari;
case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi;
case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari;
case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi;
case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari;
case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi;
case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari;
case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi;
case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari;
case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi;
case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari;
case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi;
case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari;
case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi;
case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari;
case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi;
case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari;
case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi;
}
return 0;
}