AMDGPU: Add core backend files for R600/SI codegen v6

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160270 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-23 01:24:30 +00:00 · 2012-07-16 14:17:08 +00:00
parent 9db5b5ffa9
commit 23dc769a9b
114 changed files with 28329 additions and 0 deletions
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@ -0,0 +1,35 @@
 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPU_H
 #define AMDGPU_H
 #include "AMDGPUTargetMachine.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
 namespace llvm {
 class FunctionPass;
 class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const TargetData* TD);
 FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
 // SI Passes
 FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
 // Passes common to R600 and SI
 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
 } // End namespace llvm
 #endif // AMDGPU_H
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@ -0,0 +1,21 @@
 //===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 // Include AMDIL TD files
 include "AMDILBase.td"
 include "AMDILVersion.td"
 // Include AMDGPU TD files
 include "R600Schedule.td"
 include "SISchedule.td"
 include "Processors.td"
 include "AMDGPUInstrInfo.td"
 include "AMDGPUIntrinsics.td"
 include "AMDGPURegisterInfo.td"
 include "AMDGPUInstructions.td"
--- a/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
+++ b/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
@ -0,0 +1,63 @@
 //===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass lowers AMDIL machine instructions to the appropriate hardware
 // instructions. 
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include <stdio.h>
 using namespace llvm;
 namespace {
 class AMDGPUConvertToISAPass : public MachineFunctionPass {
 private:
  static char ID;
  TargetMachine &TM;
 public:
  AMDGPUConvertToISAPass(TargetMachine &tm) :
    MachineFunctionPass(ID), TM(tm) { }
  virtual bool runOnMachineFunction(MachineFunction &MF);
  virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
 };
 } // End anonymous namespace
 char AMDGPUConvertToISAPass::ID = 0;
 FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
  return new AMDGPUConvertToISAPass(tm);
 }
 bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
 {
  const AMDGPUInstrInfo * TII =
                      static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                  BB != BB_E; ++BB) {
    MachineBasicBlock &MBB = *BB;
    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                      I != E; ++I) {
      MachineInstr &MI = *I;
      TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
    }
  }
  return false;
 }
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -0,0 +1,393 @@
 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This is the parent TargetLowering class for hardware code gen targets.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUISelLowering.h"
 #include "AMDILIntrinsicInfo.h"
 #include "AMDGPUUtil.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  AMDILTargetLowering(TM)
 {
  // We need to custom lower some of the intrinsics
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
  // Library functions.  These default to Expand, but we have instructions
  // for them.
  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
  setOperationAction(ISD::UDIV, MVT::i32, Expand);
  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
  setOperationAction(ISD::UREM, MVT::i32, Expand);
 }
 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
    const
 {
  switch (Op.getOpcode()) {
  default: return AMDILTargetLowering::LowerOperation(Op, DAG);
  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
  }
 }
 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    SelectionDAG &DAG) const
 {
  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  DebugLoc DL = Op.getDebugLoc();
  EVT VT = Op.getValueType();
  switch (IntrinsicID) {
    default: return Op;
    case AMDGPUIntrinsic::AMDIL_abs:
      return LowerIntrinsicIABS(Op, DAG);
    case AMDGPUIntrinsic::AMDIL_exp:
      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
    case AMDGPUIntrinsic::AMDIL_fabs:
      return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
    case AMDGPUIntrinsic::AMDGPU_lrp:
      return LowerIntrinsicLRP(Op, DAG);
    case AMDGPUIntrinsic::AMDIL_fraction:
      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
    case AMDGPUIntrinsic::AMDIL_mad:
      return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
                              Op.getOperand(2), Op.getOperand(3));
    case AMDGPUIntrinsic::AMDIL_max:
      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
                                                  Op.getOperand(2));
    case AMDGPUIntrinsic::AMDGPU_imax:
      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
                                                  Op.getOperand(2));
    case AMDGPUIntrinsic::AMDGPU_umax:
      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
                                                  Op.getOperand(2));
    case AMDGPUIntrinsic::AMDIL_min:
      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
                                                  Op.getOperand(2));
    case AMDGPUIntrinsic::AMDGPU_imin:
      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
                                                  Op.getOperand(2));
    case AMDGPUIntrinsic::AMDGPU_umin:
      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
                                                  Op.getOperand(2));
    case AMDGPUIntrinsic::AMDIL_round_nearest:
      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
    case AMDGPUIntrinsic::AMDIL_round_posinf:
      return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
  }
 }
 ///IABS(a) = SMAX(sub(0, a), a)
 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
    SelectionDAG &DAG) const
 {
  DebugLoc DL = Op.getDebugLoc();
  EVT VT = Op.getValueType();
  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
                                              Op.getOperand(1));
  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
 }
 /// Linear Interpolation
 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
    SelectionDAG &DAG) const
 {
  DebugLoc DL = Op.getDebugLoc();
  EVT VT = Op.getValueType();
  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
                                DAG.getConstantFP(1.0f, MVT::f32),
                                Op.getOperand(1));
  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
                                                    Op.getOperand(3));
  return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
                                               Op.getOperand(2),
                                               OneSubAC);
 }
 SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
    SelectionDAG &DAG) const
 {
  DebugLoc DL = Op.getDebugLoc();
  EVT VT = Op.getValueType();
  SDValue LHS = Op.getOperand(0);
  SDValue RHS = Op.getOperand(1);
  SDValue True = Op.getOperand(2);
  SDValue False = Op.getOperand(3);
  SDValue CC = Op.getOperand(4);
  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
  SDValue Temp;
  // LHS and RHS are guaranteed to be the same value type
  EVT CompareVT = LHS.getValueType();
  // We need all the operands of SELECT_CC to have the same value type, so if
  // necessary we need to convert LHS and RHS to be the same type True and
  // False.  True and False are guaranteed to have the same type as this
  // SELECT_CC node.
  if (CompareVT !=  VT) {
    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
    if (VT == MVT::f32 && CompareVT == MVT::i32) {
      if (isUnsignedIntSetCC(CCOpcode)) {
        ConversionOp = ISD::UINT_TO_FP;
      } else {
        ConversionOp = ISD::SINT_TO_FP;
      }
    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
      ConversionOp = ISD::FP_TO_SINT;
    } else {
      // I don't think there will be any other type pairings.
      assert(!"Unhandled operand type parings in SELECT_CC");
    }
    // XXX Check the value of LHS and RHS and avoid creating sequences like
    // (FTOI (ITOF))
    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
  }
  // If True is a hardware TRUE value and False is a hardware FALSE value or
  // vice-versa we can handle this with a native instruction (SET* instructions).
  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
  }
  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
  // we can handle this with a native instruction, but we need to swap true
  // and false and change the conditional.
  if (isHWTrueValue(False) && isHWFalseValue(True)) {
  }
  // XXX Check if we can lower this to a SELECT or if it is supported by a native
  // operation. (The code below does this but we don't have the Instruction
  // selection patterns to do this yet.
 #if 0
  if (isZero(LHS) || isZero(RHS)) {
    SDValue Cond = (isZero(LHS) ? RHS : LHS);
    bool SwapTF = false;
    switch (CCOpcode) {
    case ISD::SETOEQ:
    case ISD::SETUEQ:
    case ISD::SETEQ:
      SwapTF = true;
      // Fall through
    case ISD::SETONE:
    case ISD::SETUNE:
    case ISD::SETNE:
      // We can lower to select
      if (SwapTF) {
        Temp = True;
        True = False;
        False = Temp;
      }
      // CNDE
      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
    default:
      // Supported by a native operation (CNDGE, CNDGT)
      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
    }
  }
 #endif
  // If we make it this for it means we have no native instructions to handle
  // this SELECT_CC, so we must lower it.
  SDValue HWTrue, HWFalse;
  if (VT == MVT::f32) {
    HWTrue = DAG.getConstantFP(1.0f, VT);
    HWFalse = DAG.getConstantFP(0.0f, VT);
  } else if (VT == MVT::i32) {
    HWTrue = DAG.getConstant(-1, VT);
    HWFalse = DAG.getConstant(0, VT);
  }
  else {
    assert(!"Unhandled value type in LowerSELECT_CC");
  }
  // Lower this unsupported SELECT_CC into a combination of two supported
  // SELECT_CC operations.
  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
 }
 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
    SelectionDAG &DAG) const
 {
  DebugLoc DL = Op.getDebugLoc();
  EVT VT = Op.getValueType();
  SDValue Num = Op.getOperand(0);
  SDValue Den = Op.getOperand(1);
  SmallVector<SDValue, 8> Results;
  // RCP =  URECIP(Den) = 2^32 / Den + e
  // e is rounding error.
  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
  // RCP_LO = umulo(RCP, Den) */
  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
  // RCP_HI = mulhu (RCP, Den) */
  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
  // NEG_RCP_LO = -RCP_LO
  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
                                                     RCP_LO);
  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
                                           NEG_RCP_LO, RCP_LO,
                                           ISD::SETEQ);
  // Calculate the rounding error from the URECIP instruction
  // E = mulhu(ABS_RCP_LO, RCP)
  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
  // RCP_A_E = RCP + E
  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
  // RCP_S_E = RCP - E
  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
                                     RCP_A_E, RCP_S_E,
                                     ISD::SETEQ);
  // Quotient = mulhu(Tmp0, Num)
  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
  // Num_S_Remainder = Quotient * Den
  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
  // Remainder = Num - Num_S_Remainder
  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
                                                 DAG.getConstant(-1, VT),
                                                 DAG.getConstant(0, VT),
                                                 ISD::SETGE);
  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
                                                  DAG.getConstant(0, VT),
                                                  DAG.getConstant(-1, VT),
                                                  DAG.getConstant(0, VT),
                                                  ISD::SETGE);
  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
                                               Remainder_GE_Zero);
  // Calculate Division result:
  // Quotient_A_One = Quotient + 1
  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
                                                         DAG.getConstant(1, VT));
  // Quotient_S_One = Quotient - 1
  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
                                                         DAG.getConstant(1, VT));
  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
                                     Quotient, Quotient_A_One, ISD::SETEQ);
  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
                            Quotient_S_One, Div, ISD::SETEQ);
  // Calculate Rem result:
  // Remainder_S_Den = Remainder - Den
  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
  // Remainder_A_Den = Remainder + Den
  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
                                    Remainder, Remainder_S_Den, ISD::SETEQ);
  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
                            Remainder_A_Den, Rem, ISD::SETEQ);
  DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
  DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
  return Op;
 }
 //===----------------------------------------------------------------------===//
 // Helper functions
 //===----------------------------------------------------------------------===//
 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
 {
  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
    return CFP->isExactlyValue(1.0);
  }
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
    return C->isAllOnesValue();
  }
  return false;
 }
 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
 {
  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
    return CFP->getValueAPF().isZero();
  }
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
    return C->isNullValue();
  }
  return false;
 }
 void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
    MachineFunction * MF, MachineRegisterInfo & MRI,
    const TargetInstrInfo * TII, unsigned reg) const
 {
  AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
 }
 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 {
  switch (Opcode) {
  default: return AMDILTargetLowering::getTargetNodeName(Opcode);
  NODE_NAME_CASE(FRACT)
  NODE_NAME_CASE(FMAX)
  NODE_NAME_CASE(SMAX)
  NODE_NAME_CASE(UMAX)
  NODE_NAME_CASE(FMIN)
  NODE_NAME_CASE(SMIN)
  NODE_NAME_CASE(UMIN)
  NODE_NAME_CASE(URECIP)
  }
 }
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@ -0,0 +1,77 @@
 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the interface defintiion of the TargetLowering class
 // that is common to all AMD GPUs.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPUISELLOWERING_H
 #define AMDGPUISELLOWERING_H
 #include "AMDILISelLowering.h"
 namespace llvm {
 class AMDGPUTargetLowering : public AMDILTargetLowering
 {
 private:
  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
 protected:
  /// addLiveIn - This functions adds reg to the live in list of the entry block
  /// and emits a copy from reg to MI.getOperand(0).
  ///
  //  Some registers are loaded with values before the program
  /// begins to execute.  The loading of these values is modeled with pseudo
  /// instructions which are lowered using this function. 
  void addLiveIn(MachineInstr * MI, MachineFunction * MF,
                 MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
 		 unsigned reg) const;
  bool isHWTrueValue(SDValue Op) const;
  bool isHWFalseValue(SDValue Op) const;
 public:
  AMDGPUTargetLowering(TargetMachine &TM);
  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
  virtual const char* getTargetNodeName(unsigned Opcode) const;
 };
 namespace AMDGPUISD
 {
 enum
 {
  AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
  BITALIGN,
  FRACT,
  FMAX,
  SMAX,
  UMAX,
  FMIN,
  SMIN,
  UMIN,
  URECIP,
  LAST_AMDGPU_ISD_NUMBER
 };
 } // End namespace AMDGPUISD
 } // End namespace llvm
 #endif // AMDGPUISELLOWERING_H
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@ -0,0 +1,46 @@
 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the implementation of the TargetInstrInfo class that is
 // common to all AMD GPUs.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUTargetMachine.h"
 #include "AMDIL.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
  : AMDILInstrInfo(tm) { }
 void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
    DebugLoc DL) const
 {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const AMDGPURegisterInfo & RI = getRegisterInfo();
  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
    MachineOperand &MO = MI.getOperand(i);
    // Convert dst regclass to one that is supported by the ISA
    if (MO.isReg() && MO.isDef()) {
      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
        const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
        const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
        assert(newRegClass);
        MRI.setRegClass(MO.getReg(), newRegClass);
      }
    }
  }
 }
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@ -0,0 +1,46 @@
 //===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the definition of a TargetInstrInfo class that is common
 // to all AMD GPUs.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPUINSTRUCTIONINFO_H_
 #define AMDGPUINSTRUCTIONINFO_H_
 #include "AMDGPURegisterInfo.h"
 #include "AMDILInstrInfo.h"
 #include <map>
 namespace llvm {
 class AMDGPUTargetMachine;
 class MachineFunction;
 class MachineInstr;
 class MachineInstrBuilder;
 class AMDGPUInstrInfo : public AMDILInstrInfo {
 public:
  explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
  /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
  /// MachineInstr
  virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
    DebugLoc DL) const;
 };
 } // End llvm namespace
 #endif // AMDGPUINSTRINFO_H_
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@ -0,0 +1,69 @@
 //===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains DAG node defintions for the AMDGPU target.
 //
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Profiles
 //===----------------------------------------------------------------------===//
 def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
 ]>;
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //
 // out = ((a << 32) | b) >> c)
 //
 // Can be used to optimize rtol:
 // rotl(a, b) = bitalign(a, a, 32 - b)
 def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
 // out = a - floor(a)
 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
 // out = max(a, b) a and b are floats
 def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
  [SDNPCommutative, SDNPAssociative]
 >;
 // out = max(a, b) a and b are signed ints
 def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
 >;
 // out = max(a, b) a and b are unsigned ints
 def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
 >;
 // out = min(a, b) a and b are floats
 def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
  [SDNPCommutative, SDNPAssociative]
 >;
 // out = min(a, b) a snd b are signed ints
 def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
 >;
 // out = min(a, b) a and b are unsigned ints
 def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
 >;
 // urecip - This operation is a helper for integer division, it returns the
 // result of 1 / a as a fractional unsigned integer.
 // out = (2^32 / a) + e
 // e is rounding error
 def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@ -0,0 +1,123 @@
 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains instruction defs that are common to all hw codegen
 // targets.
 //
 //===----------------------------------------------------------------------===//
 class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
  field bits<16> AMDILOp = 0;
  field bits<3> Gen = 0;
  let Namespace = "AMDGPU";
  let OutOperandList = outs;
  let InOperandList = ins;
  let AsmString = asm;
  let Pattern = pattern;
  let Itinerary = NullALU;
  let TSFlags{42-40} = Gen;
  let TSFlags{63-48} = AMDILOp;
 }
 class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
    : AMDGPUInst<outs, ins, asm, pattern> {
  field bits<32> Inst = 0xffffffff;
 }
 class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
 int TWO_PI_INV = 0x3e22f983;
 }
 def CONST : Constants;
 def FP_ZERO : PatLeaf <
  (fpimm),
  [{return N->getValueAPF().isZero();}]
 >;
 def FP_ONE : PatLeaf <
  (fpimm),
  [{return N->isExactlyValue(1.0);}]
 >;
 let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1  in {
 class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
  (outs rc:$dst),
  (ins rc:$src0),
  "CLAMP $dst, $src0",
  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
 >;
 class FABS <RegisterClass rc> : AMDGPUShaderInst <
  (outs rc:$dst),
  (ins rc:$src0),
  "FABS $dst, $src0",
  [(set rc:$dst, (fabs rc:$src0))]
 >;
 class FNEG <RegisterClass rc> : AMDGPUShaderInst <
  (outs rc:$dst),
  (ins rc:$src0),
  "FNEG $dst, $src0",
  [(set rc:$dst, (fneg rc:$src0))]
 >;
 } // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
 /* Generic helper patterns for intrinsics */
 /* -------------------------------------- */
 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
                  RegisterClass rc> : Pat <
  (int_AMDGPU_pow rc:$src0, rc:$src1),
  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
 >;
 /* Other helper patterns */
 /* --------------------- */
 /* Extract element pattern */
 class Extract_Element <ValueType sub_type, ValueType vec_type,
                     RegisterClass vec_class, int sub_idx, 
                     SubRegIndex sub_reg>: Pat<
  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
  (EXTRACT_SUBREG vec_class:$src, sub_reg)
 >;
 /* Insert element pattern */
 class Insert_Element <ValueType elem_type, ValueType vec_type,
                      RegisterClass elem_class, RegisterClass vec_class,
                      int sub_idx, SubRegIndex sub_reg> : Pat <
  (vec_type (vector_insert (vec_type vec_class:$vec),
                           (elem_type elem_class:$elem), sub_idx)),
  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
 >;
 // Vector Build pattern
 class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
  (IL_vbuild elemClass:$src),
  (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
 >;
 // bitconvert pattern
 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
  (dt (bitconvert (st rc:$src0))),
  (dt rc:$src0)
 >;
 include "R600Instructions.td"
 include "SIInstrInfo.td"
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@ -0,0 +1,64 @@
 //===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines intrinsics that are used by all hw codegen targets.
 //
 //===----------------------------------------------------------------------===//
 let TargetPrefix = "AMDGPU", isTarget = 1 in {
  def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
  def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
  def int_AMDGPU_kilp : Intrinsic<[], [], []>;
  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
 }
 let TargetPrefix = "TGSI", isTarget = 1 in {
  def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
 }
 include "SIIntrinsics.td"
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@ -0,0 +1,24 @@
 //===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Parent TargetRegisterInfo class common to all hw codegen targets.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUTargetMachine.h"
 using namespace llvm;
 AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
    const TargetInstrInfo &tii)
 : AMDILRegisterInfo(tm, tii),
  TM(tm),
  TII(tii)
  { }
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@ -0,0 +1,42 @@
 //===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the TargetRegisterInfo interface that is implemented
 // by all hw codegen targets.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPUREGISTERINFO_H_
 #define AMDGPUREGISTERINFO_H_
 #include "AMDILRegisterInfo.h"
 namespace llvm {
 class AMDGPUTargetMachine;
 class TargetInstrInfo;
 struct AMDGPURegisterInfo : public AMDILRegisterInfo
 {
  AMDGPUTargetMachine &TM;
  const TargetInstrInfo &TII;
  AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
  virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
  /// getISARegClass - rc is an AMDIL reg class.  This function returns the
  /// ISA reg class that is equivalent to the given AMDIL reg class.
  virtual const TargetRegisterClass *
    getISARegClass(const TargetRegisterClass * rc) const = 0;
 };
 } // End namespace llvm
 #endif // AMDIDSAREGISTERINFO_H_
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@ -0,0 +1,22 @@
 //===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Tablegen register definitions common to all hw codegen targets.
 //
 //===----------------------------------------------------------------------===//
 let Namespace = "AMDGPU" in {
  def sel_x : SubRegIndex;
  def sel_y : SubRegIndex;
  def sel_z : SubRegIndex;
  def sel_w : SubRegIndex;
 }
 include "R600RegisterInfo.td"
 include "SIRegisterInfo.td"
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@ -0,0 +1,36 @@
 //=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file declares the AMDGPU specific subclass of TargetSubtarget.
 //
 //===----------------------------------------------------------------------===//
 #ifndef _AMDGPUSUBTARGET_H_
 #define _AMDGPUSUBTARGET_H_
 #include "AMDILSubtarget.h"
 namespace llvm {
 class AMDGPUSubtarget : public AMDILSubtarget
 {
  InstrItineraryData InstrItins;
 public:
  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
    AMDILSubtarget(TT, CPU, FS)
  {
    InstrItins = getInstrItineraryForCPU(CPU);
  }
  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
 };
 } // End namespace llvm
 #endif // AMDGPUSUBTARGET_H_
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@ -0,0 +1,162 @@
 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // The AMDGPU target machine contains all of the hardware specific information
 // needed to emit code for R600 and SI GPUs.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUTargetMachine.h"
 #include "AMDGPU.h"
 #include "R600ISelLowering.h"
 #include "R600InstrInfo.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 extern "C" void LLVMInitializeAMDGPUTarget() {
  // Register the target
  RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
 }
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
    StringRef CPU, StringRef FS,
  TargetOptions Options,
  Reloc::Model RM, CodeModel::Model CM,
  CodeGenOpt::Level OptLevel
 )
 :
  LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
  Subtarget(TT, CPU, FS),
  DataLayout(Subtarget.getDataLayout()),
  FrameLowering(TargetFrameLowering::StackGrowsUp,
      Subtarget.device()->getStackAlignment(), 0),
  IntrinsicInfo(this),
  InstrItins(&Subtarget.getInstrItineraryData()),
  mDump(false)
 {
  // TLInfo uses InstrInfo so it must be initialized after.
  if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
    InstrInfo = new R600InstrInfo(*this);
    TLInfo = new R600TargetLowering(*this);
  } else {
    InstrInfo = new SIInstrInfo(*this);
    TLInfo = new SITargetLowering(*this);
  }
 }
 AMDGPUTargetMachine::~AMDGPUTargetMachine()
 {
 }
 bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                              formatted_raw_ostream &Out,
                                              CodeGenFileType FileType,
                                              bool DisableVerify,
                                              AnalysisID StartAfter,
                                              AnalysisID StopAfter) {
  // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
  // only using it to access addPassesToGenerateCode()
  bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
                                                     DisableVerify);
  assert(fail);
  const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
  std::string gpu = STM.getDeviceName();
  if (gpu == "SI") {
    PM.add(createSICodeEmitterPass(Out));
  } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
    PM.add(createR600CodeEmitterPass(Out));
  } else {
    abort();
    return true;
  }
  PM.add(createGCInfoDeleter());
  return false;
 }
 namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
  AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
    : TargetPassConfig(TM, PM) {}
  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
    return getTM<AMDGPUTargetMachine>();
  }
  virtual bool addPreISel();
  virtual bool addInstSelector();
  virtual bool addPreRegAlloc();
  virtual bool addPostRegAlloc();
  virtual bool addPreSched2();
  virtual bool addPreEmitPass();
 };
 } // End of anonymous namespace
 TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
  return new AMDGPUPassConfig(this, PM);
 }
 bool
 AMDGPUPassConfig::addPreISel()
 {
  const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
  if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
    addPass(createR600KernelParametersPass(
                     getAMDGPUTargetMachine().getTargetData()));
  }
  return false;
 }
 bool AMDGPUPassConfig::addInstSelector() {
  addPass(createAMDILPeepholeOpt(*TM));
  addPass(createAMDILISelDag(getAMDGPUTargetMachine()));
  return false;
 }
 bool AMDGPUPassConfig::addPreRegAlloc() {
  const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
  if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
    addPass(createSIAssignInterpRegsPass(*TM));
  }
  addPass(createAMDGPUConvertToISAPass(*TM));
  return false;
 }
 bool AMDGPUPassConfig::addPostRegAlloc() {
  return false;
 }
 bool AMDGPUPassConfig::addPreSched2() {
  return false;
 }
 bool AMDGPUPassConfig::addPreEmitPass() {
  addPass(createAMDILCFGPreparationPass(*TM));
  addPass(createAMDILCFGStructurizerPass(*TM));
  return false;
 }
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@ -0,0 +1,76 @@
 //===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 //  The AMDGPU TargetMachine interface definition for hw codgen targets.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPU_TARGET_MACHINE_H
 #define AMDGPU_TARGET_MACHINE_H
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDILFrameLowering.h"
 #include "AMDILIntrinsicInfo.h"
 #include "R600ISelLowering.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Target/TargetData.h"
 namespace llvm {
 MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
 class AMDGPUTargetMachine : public LLVMTargetMachine {
  AMDGPUSubtarget Subtarget;
  const TargetData DataLayout;
  AMDILFrameLowering FrameLowering;
  AMDILIntrinsicInfo IntrinsicInfo;
  const AMDGPUInstrInfo * InstrInfo;
  AMDGPUTargetLowering * TLInfo;
  const InstrItineraryData* InstrItins;
  bool mDump;
 public:
   AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
                       StringRef CPU,
                       TargetOptions Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
   ~AMDGPUTargetMachine();
   virtual const AMDILFrameLowering* getFrameLowering() const {
     return &FrameLowering;
   }
   virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const {
     return &IntrinsicInfo;
   }
   virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
   virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
   virtual const AMDGPURegisterInfo *getRegisterInfo() const {
      return &InstrInfo->getRegisterInfo();
   }
   virtual AMDGPUTargetLowering * getTargetLowering() const {
      return TLInfo;
   }
   virtual const InstrItineraryData* getInstrItineraryData() const {
      return InstrItins;
   }
   virtual const TargetData* getTargetData() const { return &DataLayout; }
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                              formatted_raw_ostream &Out,
                                              CodeGenFileType FileType,
                                              bool DisableVerify,
                                              AnalysisID StartAfter = 0,
                                              AnalysisID StopAfter = 0);
 };
 } // End namespace llvm
 #endif // AMDGPU_TARGET_MACHINE_H
--- a/lib/Target/AMDGPU/AMDGPUUtil.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUtil.cpp
@ -0,0 +1,139 @@
 //===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Common utility functions used by hw codegen targets
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUUtil.h"
 #include "AMDGPURegisterInfo.h"
 #include "AMDIL.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 // Some instructions act as place holders to emulate operations that the GPU
 // hardware does automatically. This function can be used to check if
 // an opcode falls into this category.
 bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
 {
  switch (opcode) {
  default: return false;
  case AMDGPU::RETURN:
  case AMDGPU::LOAD_INPUT:
  case AMDGPU::LAST:
  case AMDGPU::MASK_WRITE:
  case AMDGPU::RESERVE_REG:
    return true;
  }
 }
 bool AMDGPU::isTransOp(unsigned opcode)
 {
  switch(opcode) {
    default: return false;
    case AMDGPU::COS_r600:
    case AMDGPU::COS_eg:
    case AMDGPU::MULLIT:
    case AMDGPU::MUL_LIT_r600:
    case AMDGPU::MUL_LIT_eg:
    case AMDGPU::EXP_IEEE_r600:
    case AMDGPU::EXP_IEEE_eg:
    case AMDGPU::LOG_CLAMPED_r600:
    case AMDGPU::LOG_IEEE_r600:
    case AMDGPU::LOG_CLAMPED_eg:
    case AMDGPU::LOG_IEEE_eg:
      return true;
  }
 }
 bool AMDGPU::isTexOp(unsigned opcode)
 {
  switch(opcode) {
  default: return false;
  case AMDGPU::TEX_LD:
  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
  case AMDGPU::TEX_SAMPLE:
  case AMDGPU::TEX_SAMPLE_C:
  case AMDGPU::TEX_SAMPLE_L:
  case AMDGPU::TEX_SAMPLE_C_L:
  case AMDGPU::TEX_SAMPLE_LB:
  case AMDGPU::TEX_SAMPLE_C_LB:
  case AMDGPU::TEX_SAMPLE_G:
  case AMDGPU::TEX_SAMPLE_C_G:
  case AMDGPU::TEX_GET_GRADIENTS_H:
  case AMDGPU::TEX_GET_GRADIENTS_V:
  case AMDGPU::TEX_SET_GRADIENTS_H:
  case AMDGPU::TEX_SET_GRADIENTS_V:
    return true;
  }
 }
 bool AMDGPU::isReductionOp(unsigned opcode)
 {
  switch(opcode) {
    default: return false;
    case AMDGPU::DOT4_r600:
    case AMDGPU::DOT4_eg:
      return true;
  }
 }
 bool AMDGPU::isCubeOp(unsigned opcode)
 {
  switch(opcode) {
    default: return false;
    case AMDGPU::CUBE_r600:
    case AMDGPU::CUBE_eg:
      return true;
  }
 }
 bool AMDGPU::isFCOp(unsigned opcode)
 {
  switch(opcode) {
  default: return false;
  case AMDGPU::BREAK_LOGICALZ_f32:
  case AMDGPU::BREAK_LOGICALNZ_i32:
  case AMDGPU::BREAK_LOGICALZ_i32:
  case AMDGPU::BREAK_LOGICALNZ_f32:
  case AMDGPU::CONTINUE_LOGICALNZ_f32:
  case AMDGPU::IF_LOGICALNZ_i32:
  case AMDGPU::IF_LOGICALZ_f32:
  case AMDGPU::ELSE:
  case AMDGPU::ENDIF:
  case AMDGPU::ENDLOOP:
  case AMDGPU::IF_LOGICALNZ_f32:
  case AMDGPU::WHILELOOP:
    return true;
  }
 }
 void AMDGPU::utilAddLiveIn(MachineFunction * MF,
                           MachineRegisterInfo & MRI,
                           const TargetInstrInfo * TII,
                           unsigned physReg, unsigned virtReg)
 {
    if (!MRI.isLiveIn(physReg)) {
      MRI.addLiveIn(physReg, virtReg);
      MF->front().addLiveIn(physReg);
      BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
              TII->get(TargetOpcode::COPY), virtReg)
                .addReg(physReg);
    } else {
      MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
    }
 }
--- a/lib/Target/AMDGPU/AMDGPUUtil.h
+++ b/lib/Target/AMDGPU/AMDGPUUtil.h
@ -0,0 +1,46 @@
 //===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Declarations for utility functions common to all hw codegen targets.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPU_UTIL_H
 #define AMDGPU_UTIL_H
 namespace llvm {
 class MachineFunction;
 class MachineRegisterInfo;
 class TargetInstrInfo;
 namespace AMDGPU {
 bool isPlaceHolderOpcode(unsigned opcode);
 bool isTransOp(unsigned opcode);
 bool isTexOp(unsigned opcode);
 bool isReductionOp(unsigned opcode);
 bool isCubeOp(unsigned opcode);
 bool isFCOp(unsigned opcode);
 // XXX: Move these to AMDGPUInstrInfo.h
 #define MO_FLAG_CLAMP (1 << 0)
 #define MO_FLAG_NEG   (1 << 1)
 #define MO_FLAG_ABS   (1 << 2)
 #define MO_FLAG_MASK  (1 << 3)
 void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
    const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
 } // End namespace AMDGPU
 } // End namespace llvm
 #endif // AMDGPU_UTIL_H
--- a/lib/Target/AMDGPU/AMDIL.h
+++ b/lib/Target/AMDGPU/AMDIL.h
@ -0,0 +1,251 @@
 //===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file contains the entry points for global functions defined in the LLVM
 // AMDIL back-end.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDIL_H_
 #define AMDIL_H_
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetMachine.h"
 #define AMDIL_MAJOR_VERSION 2
 #define AMDIL_MINOR_VERSION 0
 #define AMDIL_REVISION_NUMBER 74
 #define ARENA_SEGMENT_RESERVED_UAVS 12
 #define DEFAULT_ARENA_UAV_ID 8
 #define DEFAULT_RAW_UAV_ID 7
 #define GLOBAL_RETURN_RAW_UAV_ID 11
 #define HW_MAX_NUM_CB 8
 #define MAX_NUM_UNIQUE_UAVS 8
 #define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
 #define OPENCL_MAX_READ_IMAGES 128
 #define OPENCL_MAX_WRITE_IMAGES 8
 #define OPENCL_MAX_SAMPLERS 16
 // The next two values can never be zero, as zero is the ID that is
 // used to assert against.
 #define DEFAULT_LDS_ID     1
 #define DEFAULT_GDS_ID     1
 #define DEFAULT_SCRATCH_ID 1
 #define DEFAULT_VEC_SLOTS  8
 // SC->CAL version matchings.
 #define CAL_VERSION_SC_150               1700
 #define CAL_VERSION_SC_149               1700
 #define CAL_VERSION_SC_148               1525
 #define CAL_VERSION_SC_147               1525
 #define CAL_VERSION_SC_146               1525
 #define CAL_VERSION_SC_145               1451
 #define CAL_VERSION_SC_144               1451
 #define CAL_VERSION_SC_143               1441
 #define CAL_VERSION_SC_142               1441
 #define CAL_VERSION_SC_141               1420
 #define CAL_VERSION_SC_140               1400
 #define CAL_VERSION_SC_139               1387
 #define CAL_VERSION_SC_138               1387
 #define CAL_APPEND_BUFFER_SUPPORT        1340
 #define CAL_VERSION_SC_137               1331
 #define CAL_VERSION_SC_136                982
 #define CAL_VERSION_SC_135                950
 #define CAL_VERSION_GLOBAL_RETURN_BUFFER  990
 #define OCL_DEVICE_RV710        0x0001
 #define OCL_DEVICE_RV730        0x0002
 #define OCL_DEVICE_RV770        0x0004
 #define OCL_DEVICE_CEDAR        0x0008
 #define OCL_DEVICE_REDWOOD      0x0010
 #define OCL_DEVICE_JUNIPER      0x0020
 #define OCL_DEVICE_CYPRESS      0x0040
 #define OCL_DEVICE_CAICOS       0x0080
 #define OCL_DEVICE_TURKS        0x0100
 #define OCL_DEVICE_BARTS        0x0200
 #define OCL_DEVICE_CAYMAN       0x0400
 #define OCL_DEVICE_ALL          0x3FFF
 /// The number of function ID's that are reserved for 
 /// internal compiler usage.
 const unsigned int RESERVED_FUNCS = 1024;
 #define AMDIL_OPT_LEVEL_DECL
 #define  AMDIL_OPT_LEVEL_VAR
 #define AMDIL_OPT_LEVEL_VAR_NO_COMMA
 namespace llvm {
 class AMDILInstrPrinter;
 class FunctionPass;
 class MCAsmInfo;
 class raw_ostream;
 class Target;
 class TargetMachine;
 /// Instruction selection passes.
 FunctionPass*
  createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
 FunctionPass*
  createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
 /// Pre emit passes.
 FunctionPass*
  createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
 FunctionPass*
  createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
 extern Target TheAMDILTarget;
 extern Target TheAMDGPUTarget;
 } // end namespace llvm;
 #define GET_REGINFO_ENUM
 #include "AMDGPUGenRegisterInfo.inc"
 #define GET_INSTRINFO_ENUM
 #include "AMDGPUGenInstrInfo.inc"
 /// Include device information enumerations
 #include "AMDILDeviceInfo.h"
 namespace llvm {
 /// OpenCL uses address spaces to differentiate between
 /// various memory regions on the hardware. On the CPU
 /// all of the address spaces point to the same memory,
 /// however on the GPU, each address space points to
 /// a seperate piece of memory that is unique from other
 /// memory locations.
 namespace AMDILAS {
 enum AddressSpaces {
  PRIVATE_ADDRESS  = 0, // Address space for private memory.
  GLOBAL_ADDRESS   = 1, // Address space for global memory (RAT0, VTX0).
  CONSTANT_ADDRESS = 2, // Address space for constant memory.
  LOCAL_ADDRESS    = 3, // Address space for local memory.
  REGION_ADDRESS   = 4, // Address space for region memory.
  ADDRESS_NONE     = 5, // Address space for unknown memory.
  PARAM_D_ADDRESS  = 6, // Address space for direct addressible parameter memory (CONST0)
  PARAM_I_ADDRESS  = 7, // Address space for indirect addressible parameter memory (VTX1)
  USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
  LAST_ADDRESS     = 9
 };
 // This union/struct combination is an easy way to read out the
 // exact bits that are needed.
 typedef union ResourceRec {
  struct {
 #ifdef __BIG_ENDIAN__
    unsigned short isImage       : 1;  // Reserved for future use/llvm.
    unsigned short ResourceID    : 10; // Flag to specify the resourece ID for
                                       // the op.
    unsigned short HardwareInst  : 1;  // Flag to specify that this instruction
                                       // is a hardware instruction.
    unsigned short ConflictPtr   : 1;  // Flag to specify that the pointer has a
                                       // conflict.
    unsigned short ByteStore     : 1;  // Flag to specify if the op is a byte
                                       // store op.
    unsigned short PointerPath   : 1;  // Flag to specify if the op is on the
                                       // pointer path.
    unsigned short CacheableRead : 1;  // Flag to specify if the read is
                                       // cacheable.
 #else
    unsigned short CacheableRead : 1;  // Flag to specify if the read is
                                       // cacheable.
    unsigned short PointerPath   : 1;  // Flag to specify if the op is on the
                                       // pointer path.
    unsigned short ByteStore     : 1;  // Flag to specify if the op is byte
                                       // store op.
    unsigned short ConflictPtr   : 1;  // Flag to specify that the pointer has
                                       // a conflict.
    unsigned short HardwareInst  : 1;  // Flag to specify that this instruction
                                       // is a hardware instruction.
    unsigned short ResourceID    : 10; // Flag to specify the resource ID for
                                       // the op.
    unsigned short isImage       : 1;  // Reserved for future use.
 #endif
  } bits;
  unsigned short u16all;
 } InstrResEnc;
 } // namespace AMDILAS
 // Enums corresponding to AMDIL condition codes for IL.  These
 // values must be kept in sync with the ones in the .td file.
 namespace AMDILCC {
 enum CondCodes {
  // AMDIL specific condition codes. These correspond to the IL_CC_*
  // in AMDILInstrInfo.td and must be kept in the same order.
  IL_CC_D_EQ  =  0,   // DEQ instruction.
  IL_CC_D_GE  =  1,   // DGE instruction.
  IL_CC_D_LT  =  2,   // DLT instruction.
  IL_CC_D_NE  =  3,   // DNE instruction.
  IL_CC_F_EQ  =  4,   //  EQ instruction.
  IL_CC_F_GE  =  5,   //  GE instruction.
  IL_CC_F_LT  =  6,   //  LT instruction.
  IL_CC_F_NE  =  7,   //  NE instruction.
  IL_CC_I_EQ  =  8,   // IEQ instruction.
  IL_CC_I_GE  =  9,   // IGE instruction.
  IL_CC_I_LT  = 10,   // ILT instruction.
  IL_CC_I_NE  = 11,   // INE instruction.
  IL_CC_U_GE  = 12,   // UGE instruction.
  IL_CC_U_LT  = 13,   // ULE instruction.
  // Pseudo IL Comparison instructions here.
  IL_CC_F_GT  = 14,   //  GT instruction.
  IL_CC_U_GT  = 15,
  IL_CC_I_GT  = 16,
  IL_CC_D_GT  = 17,
  IL_CC_F_LE  = 18,   //  LE instruction
  IL_CC_U_LE  = 19,
  IL_CC_I_LE  = 20,
  IL_CC_D_LE  = 21,
  IL_CC_F_UNE = 22,
  IL_CC_F_UEQ = 23,
  IL_CC_F_ULT = 24,
  IL_CC_F_UGT = 25,
  IL_CC_F_ULE = 26,
  IL_CC_F_UGE = 27,
  IL_CC_F_ONE = 28,
  IL_CC_F_OEQ = 29,
  IL_CC_F_OLT = 30,
  IL_CC_F_OGT = 31,
  IL_CC_F_OLE = 32,
  IL_CC_F_OGE = 33,
  IL_CC_D_UNE = 34,
  IL_CC_D_UEQ = 35,
  IL_CC_D_ULT = 36,
  IL_CC_D_UGT = 37,
  IL_CC_D_ULE = 38,
  IL_CC_D_UGE = 39,
  IL_CC_D_ONE = 40,
  IL_CC_D_OEQ = 41,
  IL_CC_D_OLT = 42,
  IL_CC_D_OGT = 43,
  IL_CC_D_OLE = 44,
  IL_CC_D_OGE = 45,
  IL_CC_U_EQ  = 46,
  IL_CC_U_NE  = 47,
  IL_CC_F_O   = 48,
  IL_CC_D_O   = 49,
  IL_CC_F_UO  = 50,
  IL_CC_D_UO  = 51,
  IL_CC_L_LE  = 52,
  IL_CC_L_GE  = 53,
  IL_CC_L_EQ  = 54,
  IL_CC_L_NE  = 55,
  IL_CC_L_LT  = 56,
  IL_CC_L_GT  = 57,
  IL_CC_UL_LE = 58,
  IL_CC_UL_GE = 59,
  IL_CC_UL_EQ = 60,
  IL_CC_UL_NE = 61,
  IL_CC_UL_LT = 62,
  IL_CC_UL_GT = 63,
  COND_ERROR  = 64
 };
 } // end namespace AMDILCC
 } // end namespace llvm
 #endif // AMDIL_H_
--- a/lib/Target/AMDGPU/AMDIL7XXDevice.cpp
+++ b/lib/Target/AMDGPU/AMDIL7XXDevice.cpp
@ -0,0 +1,128 @@
 //===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #include "AMDIL7XXDevice.h"
 #include "AMDILDevice.h"
 using namespace llvm;
 AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
 {
  setCaps();
  std::string name = mSTM->getDeviceName();
  if (name == "rv710") {
    mDeviceFlag = OCL_DEVICE_RV710;
  } else if (name == "rv730") {
    mDeviceFlag = OCL_DEVICE_RV730;
  } else {
    mDeviceFlag = OCL_DEVICE_RV770;
  }
 }
 AMDIL7XXDevice::~AMDIL7XXDevice()
 {
 }
 void AMDIL7XXDevice::setCaps()
 {
  mSWBits.set(AMDILDeviceInfo::LocalMem);
 }
 size_t AMDIL7XXDevice::getMaxLDSSize() const
 {
  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
    return MAX_LDS_SIZE_700;
  }
  return 0;
 }
 size_t AMDIL7XXDevice::getWavefrontSize() const
 {
  return AMDILDevice::HalfWavefrontSize;
 }
 uint32_t AMDIL7XXDevice::getGeneration() const
 {
  return AMDILDeviceInfo::HD4XXX;
 }
 uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
 {
  switch (DeviceID) {
  default:
    assert(0 && "ID type passed in is unknown!");
    break;
  case GLOBAL_ID:
  case CONSTANT_ID:
  case RAW_UAV_ID:
  case ARENA_UAV_ID:
    break;
  case LDS_ID:
    if (usesHardware(AMDILDeviceInfo::LocalMem)) {
      return DEFAULT_LDS_ID;
    }
    break;
  case SCRATCH_ID:
    if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
      return DEFAULT_SCRATCH_ID;
    }
    break;
  case GDS_ID:
    assert(0 && "GDS UAV ID is not supported on this chip");
    if (usesHardware(AMDILDeviceInfo::RegionMem)) {
      return DEFAULT_GDS_ID;
    }
    break;
  };
  return 0;
 }
 uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
 {
  return 1;
 }
 AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
 {
  setCaps();
 }
 AMDIL770Device::~AMDIL770Device()
 {
 }
 void AMDIL770Device::setCaps()
 {
  if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
    mSWBits.set(AMDILDeviceInfo::FMA);
    mHWBits.set(AMDILDeviceInfo::DoubleOps);
  }
  mSWBits.set(AMDILDeviceInfo::BarrierDetect);
  mHWBits.reset(AMDILDeviceInfo::LongOps);
  mSWBits.set(AMDILDeviceInfo::LongOps);
  mSWBits.set(AMDILDeviceInfo::LocalMem);
 }
 size_t AMDIL770Device::getWavefrontSize() const
 {
  return AMDILDevice::WavefrontSize;
 }
 AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
 {
 }
 AMDIL710Device::~AMDIL710Device()
 {
 }
 size_t AMDIL710Device::getWavefrontSize() const
 {
  return AMDILDevice::QuarterWavefrontSize;
 }
--- a/lib/Target/AMDGPU/AMDIL7XXDevice.h
+++ b/lib/Target/AMDGPU/AMDIL7XXDevice.h
@ -0,0 +1,71 @@
 //==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Interface for the subtarget data classes.
 //
 //===----------------------------------------------------------------------===//
 // This file will define the interface that each generation needs to
 // implement in order to correctly answer queries on the capabilities of the
 // specific hardware.
 //===----------------------------------------------------------------------===//
 #ifndef _AMDIL7XXDEVICEIMPL_H_
 #define _AMDIL7XXDEVICEIMPL_H_
 #include "AMDILDevice.h"
 #include "AMDILSubtarget.h"
 namespace llvm {
 class AMDILSubtarget;
 //===----------------------------------------------------------------------===//
 // 7XX generation of devices and their respective sub classes
 //===----------------------------------------------------------------------===//
 // The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
 // devices are derived from this class. The AMDIL7XX device will only
 // support the minimal features that are required to be considered OpenCL 1.0
 // compliant and nothing more.
 class AMDIL7XXDevice : public AMDILDevice {
 public:
  AMDIL7XXDevice(AMDILSubtarget *ST);
  virtual ~AMDIL7XXDevice();
  virtual size_t getMaxLDSSize() const;
  virtual size_t getWavefrontSize() const;
  virtual uint32_t getGeneration() const;
  virtual uint32_t getResourceID(uint32_t DeviceID) const;
  virtual uint32_t getMaxNumUAVs() const;
 protected:
  virtual void setCaps();
 }; // AMDIL7XXDevice
 // The AMDIL770Device class represents the RV770 chip and it's
 // derivative cards. The difference between this device and the base
 // class is this device device adds support for double precision
 // and has a larger wavefront size.
 class AMDIL770Device : public AMDIL7XXDevice {
 public:
  AMDIL770Device(AMDILSubtarget *ST);
  virtual ~AMDIL770Device();
  virtual size_t getWavefrontSize() const;
 private:
  virtual void setCaps();
 }; // AMDIL770Device
 // The AMDIL710Device class derives from the 7XX base class, but this
 // class is a smaller derivative, so we need to overload some of the
 // functions in order to correctly specify this information.
 class AMDIL710Device : public AMDIL7XXDevice {
 public:
  AMDIL710Device(AMDILSubtarget *ST);
  virtual ~AMDIL710Device();
  virtual size_t getWavefrontSize() const;
 }; // AMDIL710Device
 } // namespace llvm
 #endif // _AMDILDEVICEIMPL_H_
--- a/lib/Target/AMDGPU/AMDILAlgorithms.tpp
+++ b/lib/Target/AMDGPU/AMDILAlgorithms.tpp
@ -0,0 +1,93 @@
 //===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file provides templates algorithms that extend the STL algorithms, but
 // are useful for the AMDIL backend
 //
 //===----------------------------------------------------------------------===//
 // A template function that loops through the iterators and passes the second
 // argument along with each iterator to the function. If the function returns
 // true, then the current iterator is invalidated and it moves back, before
 // moving forward to the next iterator, otherwise it moves forward without
 // issue. This is based on the for_each STL function, but allows a reference to
 // the second argument
 template<class InputIterator, class Function, typename Arg>
 Function binaryForEach(InputIterator First, InputIterator Last, Function F,
                       Arg &Second)
 {
  for ( ; First!=Last; ++First ) {
    F(*First, Second);
  }
  return F;
 }
 template<class InputIterator, class Function, typename Arg>
 Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
                           Arg &Second)
 {
  for ( ; First!=Last; ++First ) {
    if (F(*First, Second)) {
      --First;
    }
  }
  return F;
 }
 // A template function that has two levels of looping before calling the
 // function with the passed in argument. See binaryForEach for further
 // explanation
 template<class InputIterator, class Function, typename Arg>
 Function binaryNestedForEach(InputIterator First, InputIterator Last,
                             Function F, Arg &Second)
 {
  for ( ; First != Last; ++First) {
    binaryForEach(First->begin(), First->end(), F, Second);
  }
  return F;
 }
 template<class InputIterator, class Function, typename Arg>
 Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
                                 Function F, Arg &Second)
 {
  for ( ; First != Last; ++First) {
    safeBinaryForEach(First->begin(), First->end(), F, Second);
  }
  return F;
 }
 // Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
 // versions of these functions This allows the function to handle situations
 // such as invalidated iterators
 template<class InputIterator, class Function>
 Function safeForEach(InputIterator First, InputIterator Last, Function F)
 {
  for ( ; First!=Last; ++First )  F(&First)
    ; // Do nothing.
  return F;
 }
 // A template function that has two levels of looping before calling the
 // function with a pointer to the current iterator. See binaryForEach for
 // further explanation
 template<class InputIterator, class SecondIterator, class Function>
 Function safeNestedForEach(InputIterator First, InputIterator Last,
                              SecondIterator S, Function F)
 {
  for ( ; First != Last; ++First) {
    SecondIterator sf, sl;
    for (sf = First->begin(), sl = First->end();
         sf != sl; )  {
      if (!F(&sf)) {
        ++sf;
      } 
    }
  }
  return F;
 }
--- a/lib/Target/AMDGPU/AMDILBase.td
+++ b/lib/Target/AMDGPU/AMDILBase.td
@ -0,0 +1,113 @@
 //===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 // Target-independent interfaces which we are implementing
 //===----------------------------------------------------------------------===//
 include "llvm/Target/Target.td"
 // Dummy Instruction itineraries for pseudo instructions
 def ALU_NULL : FuncUnit;
 def NullALU : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // AMDIL Subtarget features.
 //===----------------------------------------------------------------------===//
 def FeatureFP64     : SubtargetFeature<"fp64",
        "CapsOverride[AMDILDeviceInfo::DoubleOps]",
        "true",
        "Enable 64bit double precision operations">;
 def FeatureByteAddress    : SubtargetFeature<"byte_addressable_store",
        "CapsOverride[AMDILDeviceInfo::ByteStores]",
        "true",
        "Enable byte addressable stores">;
 def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
        "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
        "true",
        "Enable duplicate barrier detection(HD5XXX or later).">;
 def FeatureImages : SubtargetFeature<"images",
        "CapsOverride[AMDILDeviceInfo::Images]",
        "true",
        "Enable image functions">;
 def FeatureMultiUAV : SubtargetFeature<"multi_uav",
        "CapsOverride[AMDILDeviceInfo::MultiUAV]",
        "true",
        "Generate multiple UAV code(HD5XXX family or later)">;
 def FeatureMacroDB : SubtargetFeature<"macrodb",
        "CapsOverride[AMDILDeviceInfo::MacroDB]",
        "true",
        "Use internal macrodb, instead of macrodb in driver">;
 def FeatureNoAlias : SubtargetFeature<"noalias",
        "CapsOverride[AMDILDeviceInfo::NoAlias]",
        "true",
        "assert that all kernel argument pointers are not aliased">;
 def FeatureNoInline : SubtargetFeature<"no-inline",
        "CapsOverride[AMDILDeviceInfo::NoInline]",
        "true",
        "specify whether to not inline functions">;
 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
        "mIs64bit",
        "false",
        "Specify if 64bit addressing should be used.">;
 def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
        "mIs32on64bit",
        "false",
        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
 def FeatureDebug : SubtargetFeature<"debug",
        "CapsOverride[AMDILDeviceInfo::Debug]",
        "true",
        "Debug mode is enabled, so disable hardware accelerated address spaces.">;
 def FeatureDumpCode : SubtargetFeature <"DumpCode",
        "mDumpCode",
        "true",
        "Dump MachineInstrs in the CodeEmitter">;
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
 //===----------------------------------------------------------------------===//
 include "AMDILRegisterInfo.td"
 include "AMDILCallingConv.td"
 include "AMDILInstrInfo.td"
 def AMDILInstrInfo : InstrInfo {}
 //===----------------------------------------------------------------------===//
 // AMDIL processors supported.
 //===----------------------------------------------------------------------===//
 //include "Processors.td"
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 def AMDILAsmWriter : AsmWriter {
    string AsmWriterClassName = "AsmPrinter";
    int Variant = 0;
 }
 def AMDILAsmParser : AsmParser {
    string AsmParserClassName = "AsmParser";
    int Variant = 0;
    string CommentDelimiter = ";";
    string RegisterPrefix = "r";
 }
 def AMDIL : Target {
  // Pull in Instruction Info:
  let InstructionSet = AMDILInstrInfo;
  let AssemblyWriters = [AMDILAsmWriter];
  let AssemblyParsers = [AMDILAsmParser];
 }
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
--- a/lib/Target/AMDGPU/AMDILCallingConv.td
+++ b/lib/Target/AMDGPU/AMDILCallingConv.td
@ -0,0 +1,42 @@
 //===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This describes the calling conventions for the AMDIL architectures.
 //
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // Return Value Calling Conventions
 //===----------------------------------------------------------------------===//
 // AMDIL 32-bit C return-value convention.
 def RetCC_AMDIL32 : CallingConv<[
 // Since IL has no return values, all values can be emulated on the stack
 // The stack can then be mapped to a number of sequential virtual registers
 // in IL
 // Integer and FP scalar values get put on the stack at 16-byte alignment
 // but with a size of 4 bytes
 CCIfType<[i32, f32], CCAssignToReg<
 [
 R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
 ]> >, CCAssignToStack<16, 16>]>;
 // AMDIL 32-bit C Calling convention.
 def CC_AMDIL32 : CallingConv<[
  // Since IL has parameter values, all values can be emulated on the stack
 // The stack can then be mapped to a number of sequential virtual registers
 // in IL
 // Integer and FP scalar values get put on the stack at 16-byte alignment
 // but with a size of 4 bytes
 // Integer and FP scalar values get put on the stack at 16-byte alignment
 // but with a size of 4 bytes
 CCIfType<[i32, f32], CCAssignToReg<
 [R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
 ]> >, CCAssignToStack<16, 16>]>;
--- a/lib/Target/AMDGPU/AMDILCodeEmitter.h
+++ b/lib/Target/AMDGPU/AMDILCodeEmitter.h
@ -0,0 +1,48 @@
 //===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // CodeEmitter interface for R600 and SI codegen.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDILCODEEMITTER_H
 #define AMDILCODEEMITTER_H
 namespace llvm {
  class AMDILCodeEmitter {
  public:
    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
    virtual uint64_t getMachineOpValue(const MachineInstr &MI,
                                   const MachineOperand &MO) const { return 0; }
    virtual unsigned GPR4AlignEncode(const MachineInstr  &MI,
                                     unsigned OpNo) const {
      return 0;
    }
    virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
                                     unsigned OpNo) const {
      return 0;
    }
    virtual uint64_t VOPPostEncode(const MachineInstr &MI,
                                   uint64_t Value) const {
      return Value;
    }
    virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
                                      unsigned OpNo) const {
      return 0;
    }
    virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
                                                                     const {
      return 0;
    }
  };
 } // End namespace llvm
 #endif // AMDILCODEEMITTER_H
--- a/lib/Target/AMDGPU/AMDILDevice.cpp
+++ b/lib/Target/AMDGPU/AMDILDevice.cpp
@ -0,0 +1,137 @@
 //===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #include "AMDILDevice.h"
 #include "AMDILSubtarget.h"
 using namespace llvm;
 // Default implementation for all of the classes.
 AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
 {
  mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
  mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
  setCaps();
  mDeviceFlag = OCL_DEVICE_ALL;
 }
 AMDILDevice::~AMDILDevice()
 {
    mHWBits.clear();
    mSWBits.clear();
 }
 size_t AMDILDevice::getMaxGDSSize() const
 {
  return 0;
 }
 uint32_t 
 AMDILDevice::getDeviceFlag() const
 {
  return mDeviceFlag;
 }
 size_t AMDILDevice::getMaxNumCBs() const
 {
  if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
    return HW_MAX_NUM_CB;
  }
  return 0;
 }
 size_t AMDILDevice::getMaxCBSize() const
 {
  if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
    return MAX_CB_SIZE;
  }
  return 0;
 }
 size_t AMDILDevice::getMaxScratchSize() const
 {
  return 65536;
 }
 uint32_t AMDILDevice::getStackAlignment() const
 {
  return 16;
 }
 void AMDILDevice::setCaps()
 {
  mSWBits.set(AMDILDeviceInfo::HalfOps);
  mSWBits.set(AMDILDeviceInfo::ByteOps);
  mSWBits.set(AMDILDeviceInfo::ShortOps);
  mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
  if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
    mSWBits.set(AMDILDeviceInfo::NoInline);
  }
  if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
    mSWBits.set(AMDILDeviceInfo::MacroDB);
  }
  if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
    mSWBits.set(AMDILDeviceInfo::ConstantMem);
  } else {
    mHWBits.set(AMDILDeviceInfo::ConstantMem);
  }
  if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
    mSWBits.set(AMDILDeviceInfo::PrivateMem);
  } else {
    mHWBits.set(AMDILDeviceInfo::PrivateMem);
  }
  if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
    mSWBits.set(AMDILDeviceInfo::BarrierDetect);
  }
  mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
  mSWBits.set(AMDILDeviceInfo::LongOps);
 }
 AMDILDeviceInfo::ExecutionMode
 AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
 {
  if (mHWBits[Caps]) {
    assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
    return AMDILDeviceInfo::Hardware;
  }
  if (mSWBits[Caps]) {
    assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
    return AMDILDeviceInfo::Software;
  }
  return AMDILDeviceInfo::Unsupported;
 }
 bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
 {
  return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
 }
 bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
 {
  return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
 }
 bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
 {
  return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
 }
 std::string
 AMDILDevice::getDataLayout() const
 {
    return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
      "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
      "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
      "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
      "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
      "-n8:16:32:64");
 }
--- a/lib/Target/AMDGPU/AMDILDevice.h
+++ b/lib/Target/AMDGPU/AMDILDevice.h
@ -0,0 +1,116 @@
 //===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Interface for the subtarget data classes.
 //
 //===----------------------------------------------------------------------===//
 // This file will define the interface that each generation needs to
 // implement in order to correctly answer queries on the capabilities of the
 // specific hardware.
 //===----------------------------------------------------------------------===//
 #ifndef _AMDILDEVICEIMPL_H_
 #define _AMDILDEVICEIMPL_H_
 #include "AMDIL.h"
 #include "llvm/ADT/BitVector.h"
 namespace llvm {
  class AMDILSubtarget;
  class MCStreamer;
 //===----------------------------------------------------------------------===//
 // Interface for data that is specific to a single device
 //===----------------------------------------------------------------------===//
 class AMDILDevice {
 public:
  AMDILDevice(AMDILSubtarget *ST);
  virtual ~AMDILDevice();
  // Enum values for the various memory types.
  enum {
    RAW_UAV_ID   = 0,
    ARENA_UAV_ID = 1,
    LDS_ID       = 2,
    GDS_ID       = 3,
    SCRATCH_ID   = 4,
    CONSTANT_ID  = 5,
    GLOBAL_ID    = 6,
    MAX_IDS      = 7
  } IO_TYPE_IDS;
  // Returns the max LDS size that the hardware supports.  Size is in
  // bytes.
  virtual size_t getMaxLDSSize() const = 0;
  // Returns the max GDS size that the hardware supports if the GDS is
  // supported by the hardware.  Size is in bytes.
  virtual size_t getMaxGDSSize() const;
  // Returns the max number of hardware constant address spaces that
  // are supported by this device.
  virtual size_t getMaxNumCBs() const;
  // Returns the max number of bytes a single hardware constant buffer
  // can support.  Size is in bytes.
  virtual size_t getMaxCBSize() const;
  // Returns the max number of bytes allowed by the hardware scratch
  // buffer.  Size is in bytes.
  virtual size_t getMaxScratchSize() const;
  // Get the flag that corresponds to the device.
  virtual uint32_t getDeviceFlag() const;
  // Returns the number of work-items that exist in a single hardware
  // wavefront.
  virtual size_t getWavefrontSize() const = 0;
  // Get the generational name of this specific device.
  virtual uint32_t getGeneration() const = 0;
  // Get the stack alignment of this specific device.
  virtual uint32_t getStackAlignment() const;
  // Get the resource ID for this specific device.
  virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
  // Get the max number of UAV's for this device.
  virtual uint32_t getMaxNumUAVs() const = 0;
  // API utilizing more detailed capabilities of each family of
  // cards. If a capability is supported, then either usesHardware or
  // usesSoftware returned true.  If usesHardware returned true, then
  // usesSoftware must return false for the same capability.  Hardware
  // execution means that the feature is done natively by the hardware
  // and is not emulated by the softare.  Software execution means
  // that the feature could be done in the hardware, but there is
  // software that emulates it with possibly using the hardware for
  // support since the hardware does not fully comply with OpenCL
  // specs.
  bool isSupported(AMDILDeviceInfo::Caps Mode) const;
  bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
  bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
  virtual std::string getDataLayout() const;
  static const unsigned int MAX_LDS_SIZE_700 = 16384;
  static const unsigned int MAX_LDS_SIZE_800 = 32768;
  static const unsigned int WavefrontSize = 64;
  static const unsigned int HalfWavefrontSize = 32;
  static const unsigned int QuarterWavefrontSize = 16;
 protected:
  virtual void setCaps();
  llvm::BitVector mHWBits;
  llvm::BitVector mSWBits;
  AMDILSubtarget *mSTM;
  uint32_t mDeviceFlag;
 private:
  AMDILDeviceInfo::ExecutionMode
  getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
 }; // AMDILDevice
 } // namespace llvm
 #endif // _AMDILDEVICEIMPL_H_
--- a/lib/Target/AMDGPU/AMDILDeviceInfo.cpp
+++ b/lib/Target/AMDGPU/AMDILDeviceInfo.cpp
@ -0,0 +1,93 @@
 //===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Function that creates DeviceInfo from a device name and other information.
 //
 //==-----------------------------------------------------------------------===//
 #include "AMDILDevices.h"
 #include "AMDILSubtarget.h"
 using namespace llvm;
 namespace llvm {
 namespace AMDILDeviceInfo {
    AMDILDevice*
 getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
 {
    if (deviceName.c_str()[2] == '7') {
        switch (deviceName.c_str()[3]) {
            case '1':
                return new AMDIL710Device(ptr);
            case '7':
                return new AMDIL770Device(ptr);
            default:
                return new AMDIL7XXDevice(ptr);
        };
    } else if (deviceName == "cypress") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDILCypressDevice(ptr);
    } else if (deviceName == "juniper") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDILEvergreenDevice(ptr);
    } else if (deviceName == "redwood") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
      return new AMDILRedwoodDevice(ptr);
    } else if (deviceName == "cedar") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDILCedarDevice(ptr);
    } else if (deviceName == "barts"
      || deviceName == "turks") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDILNIDevice(ptr);
    } else if (deviceName == "cayman") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDILCaymanDevice(ptr);
    } else if (deviceName == "caicos") {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDILNIDevice(ptr);
    } else if (deviceName == "SI") {
        return new AMDILSIDevice(ptr);
    } else {
 #if DEBUG
      assert(!is64bit && "This device does not support 64bit pointers!");
      assert(!is64on32bit && "This device does not support 64bit"
          " on 32bit pointers!");
 #endif
        return new AMDIL7XXDevice(ptr);
    }
 }
 } // End namespace AMDILDeviceInfo
 } // End namespace llvm
--- a/lib/Target/AMDGPU/AMDILDeviceInfo.h
+++ b/lib/Target/AMDGPU/AMDILDeviceInfo.h
@ -0,0 +1,89 @@
 //===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #ifndef _AMDILDEVICEINFO_H_
 #define _AMDILDEVICEINFO_H_
 #include <string>
 namespace llvm
 {
  class AMDILDevice;
  class AMDILSubtarget;
  namespace AMDILDeviceInfo
  {
    // Each Capabilities can be executed using a hardware instruction,
    // emulated with a sequence of software instructions, or not
    // supported at all.
    enum ExecutionMode {
      Unsupported = 0, // Unsupported feature on the card(Default value)
      Software, // This is the execution mode that is set if the
      // feature is emulated in software
      Hardware  // This execution mode is set if the feature exists
        // natively in hardware
    };
    // Any changes to this needs to have a corresponding update to the
    // twiki page GPUMetadataABI
    enum Caps {
      HalfOps          = 0x1,  // Half float is supported or not.
      DoubleOps        = 0x2,  // Double is supported or not.
      ByteOps          = 0x3,  // Byte(char) is support or not.
      ShortOps         = 0x4,  // Short is supported or not.
      LongOps          = 0x5,  // Long is supported or not.
      Images           = 0x6,  // Images are supported or not.
      ByteStores       = 0x7,  // ByteStores available(!HD4XXX).
      ConstantMem      = 0x8,  // Constant/CB memory.
      LocalMem         = 0x9,  // Local/LDS memory.
      PrivateMem       = 0xA,  // Scratch/Private/Stack memory.
      RegionMem        = 0xB,  // OCL GDS Memory Extension.
      FMA              = 0xC,  // Use HW FMA or SW FMA.
      ArenaSegment     = 0xD,  // Use for Arena UAV per pointer 12-1023.
      MultiUAV         = 0xE,  // Use for UAV per Pointer 0-7.
      Reserved0        = 0xF,  // ReservedFlag
      NoAlias          = 0x10, // Cached loads.
      Signed24BitOps   = 0x11, // Peephole Optimization.
      // Debug mode implies that no hardware features or optimizations
      // are performned and that all memory access go through a single
      // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
      Debug            = 0x12, // Debug mode is enabled.
      CachedMem        = 0x13, // Cached mem is available or not.
      BarrierDetect    = 0x14, // Detect duplicate barriers.
      Reserved1        = 0x15, // Reserved flag
      ByteLDSOps       = 0x16, // Flag to specify if byte LDS ops are available.
      ArenaVectors     = 0x17, // Flag to specify if vector loads from arena work.
      TmrReg           = 0x18, // Flag to specify if Tmr register is supported.
      NoInline         = 0x19, // Flag to specify that no inlining should occur.
      MacroDB          = 0x1A, // Flag to specify that backend handles macrodb.
      HW64BitDivMod    = 0x1B, // Flag for backend to generate 64bit div/mod.
      ArenaUAV         = 0x1C, // Flag to specify that arena uav is supported.
      PrivateUAV       = 0x1D, // Flag to specify that private memory uses uav's.
      // If more capabilities are required, then
      // this number needs to be increased.
      // All capabilities must come before this
      // number.
      MaxNumberCapabilities = 0x20
    };
    // These have to be in order with the older generations
    // having the lower number enumerations.
    enum Generation {
      HD4XXX = 0, // 7XX based devices.
      HD5XXX, // Evergreen based devices.
      HD6XXX, // NI/Evergreen+ based devices.
      HD7XXX,
      HDTEST, // Experimental feature testing device.
      HDNUMGEN
    };
  AMDILDevice*
    getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
  } // namespace AMDILDeviceInfo
 } // namespace llvm
 #endif // _AMDILDEVICEINFO_H_
--- a/lib/Target/AMDGPU/AMDILDevices.h
+++ b/lib/Target/AMDGPU/AMDILDevices.h
@ -0,0 +1,19 @@
 //===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #ifndef __AMDIL_DEVICES_H_
 #define __AMDIL_DEVICES_H_
 // Include all of the device specific header files
 // This file is for Internal use only!
 #include "AMDIL7XXDevice.h"
 #include "AMDILDevice.h"
 #include "AMDILEvergreenDevice.h"
 #include "AMDILNIDevice.h"
 #include "AMDILSIDevice.h"
 #endif // _AMDIL_DEVICES_H_
--- a/lib/Target/AMDGPU/AMDILEnumeratedTypes.td
+++ b/lib/Target/AMDGPU/AMDILEnumeratedTypes.td
@ -0,0 +1,522 @@
 //===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 // ILEnumreatedTypes.td - The IL Enumerated Types
 //===--------------------------------------------------------------------===//
 // Section 5.1  IL Shader
 class ILShader<bits<8> val> {
    bits<8> Value = val;
 }
 // Table 5-1
 def IL_SHADER_PIXEL : ILShader<0>;
 def IL_SHADER_COMPUTE : ILShader<1>;
 // Section 5.2 IL RegType
 class ILRegType<bits<6> val> {
    bits<6> Value = val;
 }
 // Table 5-2
 def IL_REGTYPE_TEMP      : ILRegType<0>;
 def IL_REGTYPE_WINCOORD  : ILRegType<1>;
 def IL_REGTYPE_CONST_BUF : ILRegType<2>;
 def IL_REGTYPE_LITERAL   : ILRegType<3>;
 def IL_REGTYPE_ITEMP     : ILRegType<4>;
 def IL_REGTYPE_GLOBAL    : ILRegType<5>;
 // Section 5.3 IL Component Select
 class ILComponentSelect<bits<3> val, string text> {
     bits<3> Value = val;
     string Text = text;
 }
 // Table 5-3
 def IL_COMPSEL_X : ILComponentSelect<0, "x">;
 def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
 def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
 def IL_COMPSEL_W : ILComponentSelect<3, "w">;
 def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
 def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
 // Section 5.4 IL Mod Dst Comp
 class ILModDstComp<bits<2> val, string text> {
    bits<2> Value = val;
    string Text = text;
 }
 // Table 5-4
 def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
 def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
 def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
 def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
 def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
 def IL_MODCOMP_0       : ILModDstComp<2, "0">;
 def IL_MODCOMP_1       : ILModDstComp<3, "1">;
 // Section 5.5 IL Import Usage
 class ILImportUsage<bits<1> val, string usage> {
    bits<1> Value = val;
    string Text = usage;
 }
 // Table 5-5
 def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
 // Section 5.6 Il Shift Scale
 class ILShiftScale<bits<4> val, string scale> {
    bits<4> Value = val;
    string Text = scale;
 }
 // Table 5-6
 def IL_SHIFT_NONE   : ILShiftScale<0, "">;
 def IL_SHIFT_X2     : ILShiftScale<1, "_x2">;
 def IL_SHIFT_X4     : ILShiftScale<2, "_x4">;
 def IL_SHIFT_X8     : ILShiftScale<3, "_x8">;
 def IL_SHIFT_D2     : ILShiftScale<4, "_d2">;
 def IL_SHIFT_D4     : ILShiftScale<5, "_d4">;
 def IL_SHIFT_D8     : ILShiftScale<6, "_d8">;
 // Section 5.7 IL Divide Component
 class ILDivComp<bits<3> val, string divcomp> {
    bits<3> Value = val;
    string Text = divcomp;
 }
 // Table 5-7
 def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
 def IL_DIVCOMP_Y    : ILDivComp<1, "_divcomp(y)">;
 def IL_DIVCOMP_Z    : ILDivComp<2, "_divcomp(z)">;
 def IL_DIVCOMP_W    : ILDivComp<3, "_divcomp(w)">;
 //def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
 // Section 5.8 IL Relational Op
 class ILRelOp<bits<3> val, string op> {
    bits<3> Value = val;
    string Text = op;
 }
 // Table 5-8
 def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
 def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
 def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
 def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
 def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
 def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
 // Section 5.9 IL Zero Op
 class ILZeroOp<bits<3> val, string behavior> {
    bits<3> Value = val;
    string Text = behavior;
 }
 // Table 5-9
 def IL_ZEROOP_FLTMAX    : ILZeroOp<0, "_zeroop(fltmax)">;
 def IL_ZEROOP_0         : ILZeroOp<1, "_zeroop(zero)">;
 def IL_ZEROOP_INFINITY  : ILZeroOp<2, "_zeroop(infinity)">;
 def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
 // Section 5.10 IL Cmp Value
 class ILCmpValue<bits<3> val, string num> {
    bits<3> Value = val;
    string Text = num;
 }
 // Table 5-10
 def IL_CMPVAL_0_0     : ILCmpValue<0, "0.0">;
 def IL_CMPVAL_0_5     : ILCmpValue<1, "0.5">;
 def IL_CMPVAL_1_0     : ILCmpValue<2, "1.0">;
 def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
 def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
 // Section 5.11 IL Addressing
 class ILAddressing<bits<3> val> {
    bits<3> Value = val;
 }
 // Table 5-11
 def IL_ADDR_ABSOLUTE     : ILAddressing<0>;
 def IL_ADDR_RELATIVE     : ILAddressing<1>;
 def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
 // Section 5.11 IL Element Format
 class ILElementFormat<bits<5> val> {
    bits<5> Value = val;
 }
 // Table 5-11
 def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
 def IL_ELEMENTFORMAT_SNORM   : ILElementFormat<1>;
 def IL_ELEMENTFORMAT_UNORM   : ILElementFormat<2>;
 def IL_ELEMENTFORMAT_SINT    : ILElementFormat<3>;
 def IL_ELEMENTFORMAT_UINT    : ILElementFormat<4>;
 def IL_ELEMENTFORMAT_FLOAT   : ILElementFormat<5>;
 def IL_ELEMENTFORMAT_SRGB    : ILElementFormat<6>;
 def IL_ELEMENTFORMAT_MIXED   : ILElementFormat<7>;
 def IL_ELEMENTFORMAT_Last    : ILElementFormat<8>;
 // Section 5.12 IL Op Code
 class ILOpCode<bits<16> val = -1, string cmd> {
    bits<16> Value = val;
    string Text = cmd;
 }
 // Table 5-12
 def IL_DCL_CONST_BUFFER         : ILOpCode<0, "dcl_cb">;
 def IL_DCL_INDEXED_TEMP_ARRAY   : ILOpCode<1, "dcl_index_temp_array">;
 def IL_DCL_INPUT                : ILOpCode<2, "dcl_input">;
 def IL_DCL_LITERAL              : ILOpCode<3, "dcl_literal">;
 def IL_DCL_OUTPUT               : ILOpCode<4, "dcl_output">;
 def IL_DCL_RESOURCE             : ILOpCode<5, "dcl_resource">;
 def IL_OP_ABS                   : ILOpCode<6, "abs">;
 def IL_OP_ADD                   : ILOpCode<7, "add">;
 def IL_OP_AND                   : ILOpCode<8, "iand">;
 def IL_OP_BREAK                 : ILOpCode<9, "break">;
 def IL_OP_BREAK_LOGICALNZ       : ILOpCode<10, "break_logicalnz">;
 def IL_OP_BREAK_LOGICALZ        : ILOpCode<11, "break_logicalz">;
 def IL_OP_BREAKC                : ILOpCode<12, "breakc">;
 def IL_OP_CALL                  : ILOpCode<13, "call">;
 def IL_OP_CALL_LOGICALNZ        : ILOpCode<14, "call_logicalnz">;
 def IL_OP_CALL_LOGICALZ         : ILOpCode<15, "call_logicalz">;
 def IL_OP_CASE                  : ILOpCode<16, "case">;
 def IL_OP_CLG                   : ILOpCode<17, "clg">;
 def IL_OP_CMOV                  : ILOpCode<18, "cmov">;
 def IL_OP_CMOV_LOGICAL          : ILOpCode<19, "cmov_logical">;
 def IL_OP_CMP                   : ILOpCode<20, "cmp">;
 def IL_OP_CONTINUE              : ILOpCode<21, "continue">;
 def IL_OP_CONTINUE_LOGICALNZ    : ILOpCode<22, "continue_logicalnz">;
 def IL_OP_CONTINUE_LOGICALZ     : ILOpCode<23, "continue_logicalz">;
 def IL_OP_CONTINUEC             : ILOpCode<24, "continuec">;
 def IL_OP_COS                   : ILOpCode<25, "cos">;
 def IL_OP_COS_VEC               : ILOpCode<26, "cos_vec">;
 def IL_OP_D_2_F                 : ILOpCode<27, "d2f">;
 def IL_OP_D_ADD                 : ILOpCode<28, "dadd">;
 def IL_OP_D_EQ                  : ILOpCode<29, "deq">;
 def IL_OP_D_FRC                 : ILOpCode<30, "dfrac">;
 def IL_OP_D_FREXP               : ILOpCode<31, "dfrexp">;
 def IL_OP_D_GE                  : ILOpCode<32, "dge">;
 def IL_OP_D_LDEXP               : ILOpCode<33, "dldexp">;
 def IL_OP_D_LT                  : ILOpCode<34, "dlt">;
 def IL_OP_D_MAD                 : ILOpCode<35, "dmad">;
 def IL_OP_D_MUL                 : ILOpCode<36, "dmul">;
 def IL_OP_D_NE                  : ILOpCode<37, "dne">;
 def IL_OP_DEFAULT               : ILOpCode<38, "default">;
 def IL_OP_DISCARD_LOGICALNZ     : ILOpCode<39, "discard_logicalnz">;
 def IL_OP_DISCARD_LOGICALZ      : ILOpCode<40, "discard_logicalz">;
 def IL_OP_DIV                   : ILOpCode<41, "div_zeroop(infinity)">;
 def IL_OP_DP2                   : ILOpCode<42, "dp2">;
 def IL_OP_DP3                   : ILOpCode<43, "dp3">;
 def IL_OP_DP4                   : ILOpCode<44, "dp4">;
 def IL_OP_ELSE                  : ILOpCode<45, "else">;
 def IL_OP_END                   : ILOpCode<46, "end">;
 def IL_OP_ENDFUNC               : ILOpCode<47, "endfunc">;
 def IL_OP_ENDIF                 : ILOpCode<48, "endif">;
 def IL_OP_ENDLOOP               : ILOpCode<49, "endloop">;
 def IL_OP_ENDMAIN               : ILOpCode<50, "endmain">;
 def IL_OP_ENDSWITCH             : ILOpCode<51, "endswitch">;
 def IL_OP_EQ                    : ILOpCode<52, "eq">;
 def IL_OP_EXP                   : ILOpCode<53, "exp">;
 def IL_OP_EXP_VEC               : ILOpCode<54, "exp_vec">;
 def IL_OP_F_2_D                 : ILOpCode<55, "f2d">;
 def IL_OP_FLR                   : ILOpCode<56, "flr">;
 def IL_OP_FRC                   : ILOpCode<57, "frc">;
 def IL_OP_FTOI                  : ILOpCode<58, "ftoi">;
 def IL_OP_FTOU                  : ILOpCode<59, "ftou">;
 def IL_OP_FUNC                  : ILOpCode<60, "func">;
 def IL_OP_GE                    : ILOpCode<61, "ge">;
 def IL_OP_I_ADD                 : ILOpCode<62, "iadd">;
 def IL_OP_I_EQ                  : ILOpCode<63, "ieq">;
 def IL_OP_I_GE                  : ILOpCode<64, "ige">;
 def IL_OP_I_LT                  : ILOpCode<65, "ilt">;
 def IL_OP_I_MAD                 : ILOpCode<66, "imad">;
 def IL_OP_I_MAX                 : ILOpCode<67, "imax">;
 def IL_OP_I_MIN                 : ILOpCode<68, "imin">;
 def IL_OP_I_MUL                 : ILOpCode<69, "imul">;
 def IL_OP_I_MUL_HIGH            : ILOpCode<70, "imul_high">;
 def IL_OP_I_NE                  : ILOpCode<71, "ine">;
 def IL_OP_I_NEGATE              : ILOpCode<72, "inegate">;
 def IL_OP_I_NOT                 : ILOpCode<73, "inot">;
 def IL_OP_I_OR                  : ILOpCode<74, "ior">;
 def IL_OP_I_SHL                 : ILOpCode<75, "ishl">;
 def IL_OP_I_SHR                 : ILOpCode<76, "ishr">;
 def IL_OP_I_XOR                 : ILOpCode<77, "ixor">;
 def IL_OP_IF_LOGICALNZ          : ILOpCode<78, "if_logicalnz">;
 def IL_OP_IF_LOGICALZ           : ILOpCode<79, "if_logicalz">;
 def IL_OP_IFC                   : ILOpCode<80, "ifc">;
 def IL_OP_ITOF                  : ILOpCode<81, "itof">;
 def IL_OP_LN                    : ILOpCode<82, "ln">;
 def IL_OP_LOG                   : ILOpCode<83, "log">;
 def IL_OP_LOG_VEC               : ILOpCode<84, "log_vec">;
 def IL_OP_LOOP                  : ILOpCode<85, "loop">;
 def IL_OP_LT                    : ILOpCode<86, "lt">;
 def IL_OP_MAD                   : ILOpCode<87, "mad_ieee">;
 def IL_OP_MAX                   : ILOpCode<88, "max_ieee">;
 def IL_OP_MIN                   : ILOpCode<89, "min_ieee">;
 def IL_OP_MOD                   : ILOpCode<90, "mod_ieee">;
 def IL_OP_MOV                   : ILOpCode<91, "mov">;
 def IL_OP_MUL_IEEE              : ILOpCode<92, "mul_ieee">;
 def IL_OP_NE                    : ILOpCode<93, "ne">;
 def IL_OP_NRM                   : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
 def IL_OP_POW                   : ILOpCode<95, "pow">;
 def IL_OP_RCP                   : ILOpCode<96, "rcp">;
 def IL_OP_RET                   : ILOpCode<97, "ret">;
 def IL_OP_RET_DYN               : ILOpCode<98, "ret_dyn">;
 def IL_OP_RET_LOGICALNZ         : ILOpCode<99, "ret_logicalnz">;
 def IL_OP_RET_LOGICALZ          : ILOpCode<100, "ret_logicalz">;
 def IL_OP_RND                   : ILOpCode<101, "rnd">;
 def IL_OP_ROUND_NEAR            : ILOpCode<102, "round_nearest">;
 def IL_OP_ROUND_NEG_INF         : ILOpCode<103, "round_neginf">;
 def IL_OP_ROUND_POS_INF         : ILOpCode<104, "round_plusinf">;
 def IL_OP_ROUND_ZERO            : ILOpCode<105, "round_z">;
 def IL_OP_RSQ                   : ILOpCode<106, "rsq">;
 def IL_OP_RSQ_VEC               : ILOpCode<107, "rsq_vec">;
 def IL_OP_SAMPLE                : ILOpCode<108, "sample">;
 def IL_OP_SAMPLE_L              : ILOpCode<109, "sample_l">;
 def IL_OP_SET                   : ILOpCode<110, "set">;
 def IL_OP_SGN                   : ILOpCode<111, "sgn">;
 def IL_OP_SIN                   : ILOpCode<112, "sin">;
 def IL_OP_SIN_VEC               : ILOpCode<113, "sin_vec">;
 def IL_OP_SUB                   : ILOpCode<114, "sub">;
 def IL_OP_SWITCH                : ILOpCode<115, "switch">;
 def IL_OP_TRC                   : ILOpCode<116, "trc">;
 def IL_OP_U_DIV                 : ILOpCode<117, "udiv">;
 def IL_OP_U_GE                  : ILOpCode<118, "uge">;
 def IL_OP_U_LT                  : ILOpCode<119, "ult">;
 def IL_OP_U_MAD                 : ILOpCode<120, "umad">;
 def IL_OP_U_MAX                 : ILOpCode<121, "umax">;
 def IL_OP_U_MIN                 : ILOpCode<122, "umin">;
 def IL_OP_U_MOD                 : ILOpCode<123, "umod">;
 def IL_OP_U_MUL                 : ILOpCode<124, "umul">;
 def IL_OP_U_MUL_HIGH            : ILOpCode<125, "umul_high">;
 def IL_OP_U_SHR                 : ILOpCode<126, "ushr">;
 def IL_OP_UTOF                  : ILOpCode<127, "utof">;
 def IL_OP_WHILE                 : ILOpCode<128, "whileloop">;
 // SC IL instructions that are not in CAL IL
 def IL_OP_ACOS                  : ILOpCode<129, "acos">;
 def IL_OP_ASIN                  : ILOpCode<130, "asin">;
 def IL_OP_EXN                   : ILOpCode<131, "exn">;
 def IL_OP_UBIT_REVERSE          : ILOpCode<132, "ubit_reverse">;
 def IL_OP_UBIT_EXTRACT          : ILOpCode<133, "ubit_extract">;
 def IL_OP_IBIT_EXTRACT          : ILOpCode<134, "ibit_extract">;
 def IL_OP_SQRT                  : ILOpCode<135, "sqrt">;
 def IL_OP_SQRT_VEC              : ILOpCode<136, "sqrt_vec">;
 def IL_OP_ATAN                  : ILOpCode<137, "atan">;
 def IL_OP_TAN                   : ILOpCode<137, "tan">;
 def IL_OP_D_DIV                 : ILOpCode<138, "ddiv">;
 def IL_OP_F_NEG                 : ILOpCode<139, "mov">;
 def IL_OP_GT                    : ILOpCode<140, "gt">;
 def IL_OP_LE                    : ILOpCode<141, "lt">;
 def IL_OP_DIST                  : ILOpCode<142, "dist">;
 def IL_OP_LEN                   : ILOpCode<143, "len">;
 def IL_OP_MACRO                 : ILOpCode<144, "mcall">;
 def IL_OP_INTR                  : ILOpCode<145, "call">;
 def IL_OP_I_FFB_HI              : ILOpCode<146, "ffb_hi">;
 def IL_OP_I_FFB_LO              : ILOpCode<147, "ffb_lo">;
 def IL_OP_BARRIER               : ILOpCode<148, "fence_threads_memory_lds">;
 def IL_OP_BARRIER_LOCAL         : ILOpCode<149, "fence_threads_lds">;
 def IL_OP_BARRIER_GLOBAL        : ILOpCode<150, "fence_threads_memory">;
 def IL_OP_FENCE                 : ILOpCode<151, "fence_lds_memory">;
 def IL_OP_FENCE_READ_ONLY       : ILOpCode<152, "fence_lds_mem_read_only">;
 def IL_OP_FENCE_WRITE_ONLY      : ILOpCode<153, "fence_lds_mem_write_only">;
 def IL_PSEUDO_INST              : ILOpCode<154, ";Pseudo Op">;
 def IL_OP_UNPACK_0              : ILOpCode<155, "unpack0">;
 def IL_OP_UNPACK_1              : ILOpCode<156, "unpack1">;
 def IL_OP_UNPACK_2              : ILOpCode<157, "unpack2">;
 def IL_OP_UNPACK_3              : ILOpCode<158, "unpack3">;
 def IL_OP_PI_REDUCE             : ILOpCode<159, "pireduce">;
 def IL_OP_IBIT_COUNT            : ILOpCode<160, "icbits">;
 def IL_OP_I_FFB_SGN             : ILOpCode<161, "ffb_shi">;
 def IL_OP_F2U4                  : ILOpCode<162, "f_2_u4">;
 def IL_OP_BIT_ALIGN             : ILOpCode<163, "bitalign">;
 def IL_OP_BYTE_ALIGN            : ILOpCode<164, "bytealign">;
 def IL_OP_U4_LERP               : ILOpCode<165, "u4lerp">;
 def IL_OP_SAD                   : ILOpCode<166, "sad">;
 def IL_OP_SAD_HI                : ILOpCode<167, "sadhi">;
 def IL_OP_SAD4                  : ILOpCode<168, "sad4">;
 def IL_OP_UBIT_INSERT           : ILOpCode<169, "ubit_insert">;
 def IL_OP_I_CARRY               : ILOpCode<170, "icarry">;
 def IL_OP_I_BORROW              : ILOpCode<171, "iborrow">;
 def IL_OP_U_MAD24               : ILOpCode<172, "umad24">;
 def IL_OP_U_MUL24               : ILOpCode<173, "umul24">;
 def IL_OP_I_MAD24               : ILOpCode<174, "imad24">;
 def IL_OP_I_MUL24               : ILOpCode<175, "imul24">;
 def IL_OP_CLAMP                 : ILOpCode<176, "clamp">;
 def IL_OP_LERP                  : ILOpCode<177, "lrp">;
 def IL_OP_FMA                   : ILOpCode<178, "fma">;
 def IL_OP_D_MIN                 : ILOpCode<179, "dmin">;
 def IL_OP_D_MAX                 : ILOpCode<180, "dmax">;
 def IL_OP_D_SQRT                : ILOpCode<181, "dsqrt">;
 def IL_OP_DP2_ADD               : ILOpCode<182, "dp2add">;
 def IL_OP_F16_TO_F32            : ILOpCode<183, "f162f">;
 def IL_OP_F32_TO_F16            : ILOpCode<184, "f2f16">;
 def IL_REG_LOCAL_ID_FLAT        : ILOpCode<185, "vTidInGrpFlat">;
 def IL_REG_LOCAL_ID             : ILOpCode<186, "vTidInGrp">;
 def IL_REG_GLOBAL_ID_FLAT       : ILOpCode<187, "vAbsTidFlag">;
 def IL_REG_GLOBAL_ID            : ILOpCode<188, "vAbsTid">;
 def IL_REG_GROUP_ID_FLAT        : ILOpCode<189, "vThreadGrpIDFlat">;
 def IL_REG_GROUP_ID             : ILOpCode<190, "vThreadGrpID">;
 def IL_OP_D_RCP                 : ILOpCode<191, "drcp_zeroop(infinity)">;
 def IL_OP_D_RSQ                 : ILOpCode<192, "drsq_zeroop(infinity)">;
 def IL_OP_D_MOV                 : ILOpCode<193, "dmov">;
 def IL_OP_D_MOVC                : ILOpCode<194, "dmovc">;
 def IL_OP_NOP                   : ILOpCode<195, "nop">;
 def IL_OP_UAV_ADD               : ILOpCode<196, "uav_add">;
 def IL_OP_UAV_AND               : ILOpCode<197, "uav_and">;
 def IL_OP_UAV_MAX               : ILOpCode<198, "uav_max">;
 def IL_OP_UAV_MIN               : ILOpCode<199, "uav_min">;
 def IL_OP_UAV_OR                : ILOpCode<200, "uav_or">;
 def IL_OP_UAV_RSUB              : ILOpCode<201, "uav_rsub">;
 def IL_OP_UAV_SUB               : ILOpCode<202, "uav_sub">;
 def IL_OP_UAV_UMAX              : ILOpCode<203, "uav_umax">;
 def IL_OP_UAV_UMIN              : ILOpCode<204, "uav_umin">;
 def IL_OP_UAV_XOR               : ILOpCode<205, "uav_xor">;
 def IL_OP_UAV_INC               : ILOpCode<206, "uav_uinc">;
 def IL_OP_UAV_DEC               : ILOpCode<207, "uav_udec">;
 def IL_OP_UAV_CMP               : ILOpCode<208, "uav_cmp">;
 def IL_OP_UAV_READ_ADD          : ILOpCode<209, "uav_read_add">;
 def IL_OP_UAV_READ_AND          : ILOpCode<210, "uav_read_and">;
 def IL_OP_UAV_READ_MAX          : ILOpCode<211, "uav_read_max">;
 def IL_OP_UAV_READ_MIN          : ILOpCode<212, "uav_read_min">;
 def IL_OP_UAV_READ_OR           : ILOpCode<213, "uav_read_or">;
 def IL_OP_UAV_READ_RSUB         : ILOpCode<214, "uav_read_rsub">;
 def IL_OP_UAV_READ_SUB          : ILOpCode<215, "uav_read_sub">;
 def IL_OP_UAV_READ_UMAX         : ILOpCode<216, "uav_read_umax">;
 def IL_OP_UAV_READ_UMIN         : ILOpCode<217, "uav_read_umin">;
 def IL_OP_UAV_READ_XOR          : ILOpCode<218, "uav_read_xor">;
 def IL_OP_UAV_READ_INC          : ILOpCode<219, "uav_read_uinc">;
 def IL_OP_UAV_READ_DEC          : ILOpCode<220, "uav_read_udec">;
 def IL_OP_UAV_READ_XCHG         : ILOpCode<221, "uav_read_xchg">;
 def IL_OP_UAV_READ_CMPXCHG      : ILOpCode<222, "uav_read_cmp_xchg">;
 def IL_OP_LDS_ADD               : ILOpCode<223, "lds_add">;
 def IL_OP_LDS_AND               : ILOpCode<224, "lds_and">;
 def IL_OP_LDS_MAX               : ILOpCode<225, "lds_max">;
 def IL_OP_LDS_MIN               : ILOpCode<226, "lds_min">;
 def IL_OP_LDS_OR                : ILOpCode<227, "lds_or">;
 def IL_OP_LDS_RSUB              : ILOpCode<228, "lds_rsub">;
 def IL_OP_LDS_SUB               : ILOpCode<229, "lds_sub">;
 def IL_OP_LDS_UMAX              : ILOpCode<230, "lds_umax">;
 def IL_OP_LDS_UMIN              : ILOpCode<231, "lds_umin">;
 def IL_OP_LDS_XOR               : ILOpCode<232, "lds_xor">;
 def IL_OP_LDS_INC               : ILOpCode<233, "lds_inc">;
 def IL_OP_LDS_DEC               : ILOpCode<234, "lds_dec">;
 def IL_OP_LDS_CMP               : ILOpCode<235, "lds_cmp">;
 def IL_OP_LDS_READ_ADD          : ILOpCode<236, "lds_read_add">;
 def IL_OP_LDS_READ_AND          : ILOpCode<237, "lds_read_and">;
 def IL_OP_LDS_READ_MAX          : ILOpCode<238, "lds_read_max">;
 def IL_OP_LDS_READ_MIN          : ILOpCode<239, "lds_read_min">;
 def IL_OP_LDS_READ_OR           : ILOpCode<240, "lds_read_or">;
 def IL_OP_LDS_READ_RSUB         : ILOpCode<241, "lds_read_rsub">;
 def IL_OP_LDS_READ_SUB          : ILOpCode<242, "lds_read_sub">;
 def IL_OP_LDS_READ_UMAX         : ILOpCode<243, "lds_read_umax">;
 def IL_OP_LDS_READ_UMIN         : ILOpCode<244, "lds_read_umin">;
 def IL_OP_LDS_READ_XOR          : ILOpCode<245, "lds_read_xor">;
 def IL_OP_LDS_READ_INC          : ILOpCode<246, "lds_read_inc">;
 def IL_OP_LDS_READ_DEC          : ILOpCode<247, "lds_read_dec">;
 def IL_OP_LDS_READ_XCHG         : ILOpCode<248, "lds_read_xchg">;
 def IL_OP_LDS_READ_CMPXCHG      : ILOpCode<249, "lds_read_cmp_xchg">;
 def IL_OP_GDS_ADD               : ILOpCode<250, "gds_add">;
 def IL_OP_GDS_AND               : ILOpCode<251, "gds_and">;
 def IL_OP_GDS_MAX               : ILOpCode<252, "gds_max">;
 def IL_OP_GDS_MIN               : ILOpCode<253, "gds_min">;
 def IL_OP_GDS_OR                : ILOpCode<254, "gds_or">;
 def IL_OP_GDS_RSUB              : ILOpCode<255, "gds_rsub">;
 def IL_OP_GDS_SUB               : ILOpCode<256, "gds_sub">;
 def IL_OP_GDS_UMAX              : ILOpCode<257, "gds_umax">;
 def IL_OP_GDS_UMIN              : ILOpCode<258, "gds_umin">;
 def IL_OP_GDS_MSKOR             : ILOpCode<259, "gds_mskor">;
 def IL_OP_GDS_XOR               : ILOpCode<260, "gds_xor">;
 def IL_OP_GDS_INC               : ILOpCode<261, "gds_inc">;
 def IL_OP_GDS_DEC               : ILOpCode<262, "gds_dec">;
 def IL_OP_GDS_CMP               : ILOpCode<263, "gds_cmp">;
 def IL_OP_GDS_READ_ADD          : ILOpCode<264, "gds_read_add">;
 def IL_OP_GDS_READ_AND          : ILOpCode<265, "gds_read_and">;
 def IL_OP_GDS_READ_MAX          : ILOpCode<266, "gds_read_max">;
 def IL_OP_GDS_READ_MIN          : ILOpCode<267, "gds_read_min">;
 def IL_OP_GDS_READ_OR           : ILOpCode<268, "gds_read_or">;
 def IL_OP_GDS_READ_RSUB         : ILOpCode<269, "gds_read_rsub">;
 def IL_OP_GDS_READ_SUB          : ILOpCode<270, "gds_read_sub">;
 def IL_OP_GDS_READ_UMAX         : ILOpCode<271, "gds_read_umax">;
 def IL_OP_GDS_READ_UMIN         : ILOpCode<272, "gds_read_umin">;
 def IL_OP_GDS_READ_MSKOR        : ILOpCode<273, "gds_read_mskor">;
 def IL_OP_GDS_READ_XOR          : ILOpCode<274, "gds_read_xor">;
 def IL_OP_GDS_READ_INC          : ILOpCode<275, "gds_read_inc">;
 def IL_OP_GDS_READ_DEC          : ILOpCode<276, "gds_read_dec">;
 def IL_OP_GDS_READ_XCHG         : ILOpCode<277, "gds_read_xchg">;
 def IL_OP_GDS_READ_CMPXCHG      : ILOpCode<278, "gds_read_cmp_xchg">;
 def IL_OP_APPEND_BUF_ALLOC      : ILOpCode<279, "append_buf_alloc">;
 def IL_OP_APPEND_BUF_CONSUME    : ILOpCode<280, "append_buf_consume">;
 def IL_OP_I64_ADD               : ILOpCode<281, "i64add">;
 def IL_OP_I64_MAX               : ILOpCode<282, "i64max">;
 def IL_OP_U64_MAX               : ILOpCode<283, "u64max">;
 def IL_OP_I64_MIN               : ILOpCode<284, "i64min">;
 def IL_OP_U64_MIN               : ILOpCode<285, "u64min">;
 def IL_OP_I64_NEGATE            : ILOpCode<286, "i64negate">;
 def IL_OP_I64_SHL               : ILOpCode<287, "i64shl">;
 def IL_OP_I64_SHR               : ILOpCode<288, "i64shr">;
 def IL_OP_U64_SHR               : ILOpCode<289, "u64shr">;
 def IL_OP_I64_EQ                : ILOpCode<290, "i64eq">;
 def IL_OP_I64_GE                : ILOpCode<291, "i64ge">;
 def IL_OP_U64_GE                : ILOpCode<292, "u64ge">;
 def IL_OP_I64_LT                : ILOpCode<293, "i64lt">;
 def IL_OP_U64_LT                : ILOpCode<294, "u64lt">;
 def IL_OP_I64_NE                : ILOpCode<295, "i64ne">;
 def IL_OP_U_MULHI24             : ILOpCode<296, "umul24_high">;
 def IL_OP_I_MULHI24             : ILOpCode<297, "imul24_high">;
 def IL_OP_GDS_LOAD              : ILOpCode<298, "gds_load">;
 def IL_OP_GDS_STORE             : ILOpCode<299, "gds_store">;
 def IL_OP_LDS_LOAD              : ILOpCode<300, "lds_load">;
 def IL_OP_LDS_LOAD_VEC          : ILOpCode<301, "lds_load_vec">;
 def IL_OP_LDS_LOAD_BYTE         : ILOpCode<302, "lds_load_byte">;
 def IL_OP_LDS_LOAD_UBYTE        : ILOpCode<303, "lds_load_ubyte">;
 def IL_OP_LDS_LOAD_SHORT        : ILOpCode<304, "lds_load_short">;
 def IL_OP_LDS_LOAD_USHORT       : ILOpCode<305, "lds_load_ushort">;
 def IL_OP_LDS_STORE             : ILOpCode<306, "lds_store">;
 def IL_OP_LDS_STORE_VEC         : ILOpCode<307, "lds_store_vec">;
 def IL_OP_LDS_STORE_BYTE        : ILOpCode<308, "lds_store_byte">;
 def IL_OP_LDS_STORE_SHORT       : ILOpCode<309, "lds_store_short">;
 def IL_OP_RAW_UAV_LOAD          : ILOpCode<310, "uav_raw_load">;
 def IL_OP_RAW_UAV_STORE         : ILOpCode<311, "uav_raw_store">;
 def IL_OP_ARENA_UAV_LOAD        : ILOpCode<312, "uav_arena_load">;
 def IL_OP_ARENA_UAV_STORE       : ILOpCode<313, "uav_arena_store">;
 def IL_OP_LDS_MSKOR             : ILOpCode<314, "lds_mskor">;
 def IL_OP_LDS_READ_MSKOR        : ILOpCode<315, "lds_read_mskor">;
 def IL_OP_UAV_BYTE_LOAD         : ILOpCode<316, "uav_byte_load">;
 def IL_OP_UAV_UBYTE_LOAD        : ILOpCode<317, "uav_ubyte_load">;
 def IL_OP_UAV_SHORT_LOAD        : ILOpCode<318, "uav_short_load">;
 def IL_OP_UAV_USHORT_LOAD       : ILOpCode<319, "uav_ushort_load">;
 def IL_OP_UAV_BYTE_STORE        : ILOpCode<320, "uav_byte_store">;
 def IL_OP_UAV_SHORT_STORE       : ILOpCode<320, "uav_short_store">;
 def IL_OP_UAV_STORE             : ILOpCode<321, "uav_store">;
 def IL_OP_UAV_LOAD              : ILOpCode<322, "uav_load">;
 def IL_OP_MUL                   : ILOpCode<323, "mul">;
 def IL_OP_DIV_INF               : ILOpCode<324, "div_zeroop(infinity)">;
 def IL_OP_DIV_FLTMAX            : ILOpCode<325, "div_zeroop(fltmax)">;
 def IL_OP_DIV_ZERO              : ILOpCode<326, "div_zeroop(zero)">;
 def IL_OP_DIV_INFELSEMAX        : ILOpCode<327, "div_zeroop(inf_else_max)">;
 def IL_OP_FTOI_FLR              : ILOpCode<328, "ftoi_flr">;
 def IL_OP_FTOI_RPI              : ILOpCode<329, "ftoi_rpi">;
 def IL_OP_F32_TO_F16_NEAR       : ILOpCode<330, "f2f16_near">;
 def IL_OP_F32_TO_F16_NEG_INF    : ILOpCode<331, "f2f16_neg_inf">;
 def IL_OP_F32_TO_F16_PLUS_INF   : ILOpCode<332, "f2f16_plus_inf">;
 def IL_OP_I64_MUL               : ILOpCode<333, "i64mul">;
 def IL_OP_U64_MUL               : ILOpCode<334, "u64mul">;
 def IL_OP_CU_ID                 : ILOpCode<355, "cu_id">;
 def IL_OP_WAVE_ID               : ILOpCode<356, "wave_id">;
 def IL_OP_I64_SUB               : ILOpCode<357, "i64sub">;
 def IL_OP_I64_DIV               : ILOpCode<358, "i64div">;
 def IL_OP_U64_DIV               : ILOpCode<359, "u64div">;
 def IL_OP_I64_MOD               : ILOpCode<360, "i64mod">;
 def IL_OP_U64_MOD               : ILOpCode<361, "u64mod">;
 def IL_DCL_GWS_THREAD_COUNT     : ILOpCode<362, "dcl_gws_thread_count">;
 def IL_DCL_SEMAPHORE            : ILOpCode<363, "dcl_semaphore">;
 def IL_OP_SEMAPHORE_INIT        : ILOpCode<364, "init_semaphore">;
 def IL_OP_SEMAPHORE_WAIT        : ILOpCode<365, "semaphore_wait">;
 def IL_OP_SEMAPHORE_SIGNAL      : ILOpCode<366, "semaphore_signal">;
 def IL_OP_BARRIER_REGION        : ILOpCode<377, "fence_threads_gds">;
 def IL_OP_BFI                   : ILOpCode<394, "bfi">;
 def IL_OP_BFM                   : ILOpCode<395, "bfm">;
 def IL_DBG_STRING               : ILOpCode<396, "dbg_string">;
 def IL_DBG_LINE                 : ILOpCode<397, "dbg_line">;
 def IL_DBG_TEMPLOC              : ILOpCode<398, "dbg_temploc">;
--- a/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
+++ b/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
@ -0,0 +1,183 @@
 //===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #include "AMDILEvergreenDevice.h"
 using namespace llvm;
 AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
 : AMDILDevice(ST) {
  setCaps();
  std::string name = ST->getDeviceName();
  if (name == "cedar") {
    mDeviceFlag = OCL_DEVICE_CEDAR;
  } else if (name == "redwood") {
    mDeviceFlag = OCL_DEVICE_REDWOOD;
  } else if (name == "cypress") {
    mDeviceFlag = OCL_DEVICE_CYPRESS;
  } else {
    mDeviceFlag = OCL_DEVICE_JUNIPER;
  }
 }
 AMDILEvergreenDevice::~AMDILEvergreenDevice() {
 }
 size_t AMDILEvergreenDevice::getMaxLDSSize() const {
  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
    return MAX_LDS_SIZE_800;
  } else {
    return 0;
  }
 }
 size_t AMDILEvergreenDevice::getMaxGDSSize() const {
  if (usesHardware(AMDILDeviceInfo::RegionMem)) {
    return MAX_LDS_SIZE_800;
  } else {
    return 0;
  }
 }
 uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
  return 12;
 }
 uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
  switch(id) {
  default:
    assert(0 && "ID type passed in is unknown!");
    break;
  case CONSTANT_ID:
  case RAW_UAV_ID:
    if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
      return GLOBAL_RETURN_RAW_UAV_ID;
    } else {
      return DEFAULT_RAW_UAV_ID;
    }
  case GLOBAL_ID:
  case ARENA_UAV_ID:
    return DEFAULT_ARENA_UAV_ID;
  case LDS_ID:
    if (usesHardware(AMDILDeviceInfo::LocalMem)) {
      return DEFAULT_LDS_ID;
    } else {
      return DEFAULT_ARENA_UAV_ID;
    }
  case GDS_ID:
    if (usesHardware(AMDILDeviceInfo::RegionMem)) {
      return DEFAULT_GDS_ID;
    } else {
      return DEFAULT_ARENA_UAV_ID;
    }
  case SCRATCH_ID:
    if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
      return DEFAULT_SCRATCH_ID;
    } else {
      return DEFAULT_ARENA_UAV_ID;
    }
  };
  return 0;
 }
 size_t AMDILEvergreenDevice::getWavefrontSize() const {
  return AMDILDevice::WavefrontSize;
 }
 uint32_t AMDILEvergreenDevice::getGeneration() const {
  return AMDILDeviceInfo::HD5XXX;
 }
 void AMDILEvergreenDevice::setCaps() {
  mSWBits.set(AMDILDeviceInfo::ArenaSegment);
  mHWBits.set(AMDILDeviceInfo::ArenaUAV);
  if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
    mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
    mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
  } 
  mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
  if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
    mHWBits.set(AMDILDeviceInfo::ByteStores);
  }
  if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
    mSWBits.set(AMDILDeviceInfo::LocalMem);
    mSWBits.set(AMDILDeviceInfo::RegionMem);
  } else {
    mHWBits.set(AMDILDeviceInfo::LocalMem);
    mHWBits.set(AMDILDeviceInfo::RegionMem);
  }
  mHWBits.set(AMDILDeviceInfo::Images);
  if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
    mHWBits.set(AMDILDeviceInfo::NoAlias);
  }
  if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
    mHWBits.set(AMDILDeviceInfo::CachedMem);
  }
  if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
    mHWBits.set(AMDILDeviceInfo::MultiUAV);
  }
  if (mSTM->calVersion() > CAL_VERSION_SC_136) {
    mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
    mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
    mHWBits.set(AMDILDeviceInfo::ArenaVectors);
  } else {
    mSWBits.set(AMDILDeviceInfo::ArenaVectors);
  }
  if (mSTM->calVersion() > CAL_VERSION_SC_137) {
    mHWBits.set(AMDILDeviceInfo::LongOps);
    mSWBits.reset(AMDILDeviceInfo::LongOps);
  }
  mHWBits.set(AMDILDeviceInfo::TmrReg);
 }
 AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
  : AMDILEvergreenDevice(ST) {
  setCaps();
 }
 AMDILCypressDevice::~AMDILCypressDevice() {
 }
 void AMDILCypressDevice::setCaps() {
  if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
    mHWBits.set(AMDILDeviceInfo::DoubleOps);
    mHWBits.set(AMDILDeviceInfo::FMA);
  }
 }
 AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
  : AMDILEvergreenDevice(ST) {
  setCaps();
 }
 AMDILCedarDevice::~AMDILCedarDevice() {
 }
 void AMDILCedarDevice::setCaps() {
  mSWBits.set(AMDILDeviceInfo::FMA);
 }
 size_t AMDILCedarDevice::getWavefrontSize() const {
  return AMDILDevice::QuarterWavefrontSize;
 }
 AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
  : AMDILEvergreenDevice(ST) {
  setCaps();
 }
 AMDILRedwoodDevice::~AMDILRedwoodDevice()
 {
 }
 void AMDILRedwoodDevice::setCaps() {
  mSWBits.set(AMDILDeviceInfo::FMA);
 }
 size_t AMDILRedwoodDevice::getWavefrontSize() const {
  return AMDILDevice::HalfWavefrontSize;
 }
--- a/lib/Target/AMDGPU/AMDILEvergreenDevice.h
+++ b/lib/Target/AMDGPU/AMDILEvergreenDevice.h
@ -0,0 +1,87 @@
 //==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Interface for the subtarget data classes.
 //
 //===----------------------------------------------------------------------===//
 // This file will define the interface that each generation needs to
 // implement in order to correctly answer queries on the capabilities of the
 // specific hardware.
 //===----------------------------------------------------------------------===//
 #ifndef _AMDILEVERGREENDEVICE_H_
 #define _AMDILEVERGREENDEVICE_H_
 #include "AMDILDevice.h"
 #include "AMDILSubtarget.h"
 namespace llvm {
  class AMDILSubtarget;
 //===----------------------------------------------------------------------===//
 // Evergreen generation of devices and their respective sub classes
 //===----------------------------------------------------------------------===//
 // The AMDILEvergreenDevice is the base device class for all of the Evergreen
 // series of cards. This class contains information required to differentiate
 // the Evergreen device from the generic AMDILDevice. This device represents
 // that capabilities of the 'Juniper' cards, also known as the HD57XX.
 class AMDILEvergreenDevice : public AMDILDevice {
 public:
  AMDILEvergreenDevice(AMDILSubtarget *ST);
  virtual ~AMDILEvergreenDevice();
  virtual size_t getMaxLDSSize() const;
  virtual size_t getMaxGDSSize() const;
  virtual size_t getWavefrontSize() const;
  virtual uint32_t getGeneration() const;
  virtual uint32_t getMaxNumUAVs() const;
  virtual uint32_t getResourceID(uint32_t) const;
 protected:
  virtual void setCaps();
 }; // AMDILEvergreenDevice
 // The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
 // support for double precision operations. This device is used to represent
 // both the Cypress and Hemlock cards, which are commercially known as HD58XX
 // and HD59XX cards.
 class AMDILCypressDevice : public AMDILEvergreenDevice {
 public:
  AMDILCypressDevice(AMDILSubtarget *ST);
  virtual ~AMDILCypressDevice();
 private:
  virtual void setCaps();
 }; // AMDILCypressDevice
 // The AMDILCedarDevice is the class that represents all of the 'Cedar' based
 // devices. This class differs from the base AMDILEvergreenDevice in that the
 // device is a ~quarter of the 'Juniper'. These are commercially known as the
 // HD54XX and HD53XX series of cards.
 class AMDILCedarDevice : public AMDILEvergreenDevice {
 public:
  AMDILCedarDevice(AMDILSubtarget *ST);
  virtual ~AMDILCedarDevice();
  virtual size_t getWavefrontSize() const;
 private:
  virtual void setCaps();
 }; // AMDILCedarDevice
 // The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
 // devices. This class differs from the base class, in that these devices are
 // considered about half of a 'Juniper' device. These are commercially known as
 // the HD55XX and HD56XX series of cards.
 class AMDILRedwoodDevice : public AMDILEvergreenDevice {
 public:
  AMDILRedwoodDevice(AMDILSubtarget *ST);
  virtual ~AMDILRedwoodDevice();
  virtual size_t getWavefrontSize() const;
 private:
  virtual void setCaps();
 }; // AMDILRedwoodDevice
 } // namespace llvm
 #endif // _AMDILEVERGREENDEVICE_H_
--- a/lib/Target/AMDGPU/AMDILFormats.td
+++ b/lib/Target/AMDGPU/AMDILFormats.td
@ -0,0 +1,175 @@
 //==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 //===--------------------------------------------------------------------===//
 include "AMDILTokenDesc.td"
 //===--------------------------------------------------------------------===//
 // The parent IL instruction class that inherits the Instruction class. This
 // class sets the corresponding namespace, the out and input dag lists the
 // pattern to match to and the string to print out for the assembly printer.
 //===--------------------------------------------------------------------===//
 class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
 : Instruction {
     let Namespace = "AMDGPU";
     dag OutOperandList = outs;
     dag InOperandList = ins;
     ILOpCode operation = op;
     let Pattern = pattern;
     let AsmString = !strconcat(asmstr, "\n");
     let isPseudo = 1;
     let Itinerary = NullALU;
     bit hasIEEEFlag = 0;
     bit hasZeroOpFlag = 0;
 }
 //===--------------------------------------------------------------------===//
 // Class that has one input parameters and one output parameter.
 // The basic pattern for this class is "Opcode Dst, Src0" and
 // handles the unary math operators.
 // It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
 // if the addressing is register relative for input and output register 0.
 //===--------------------------------------------------------------------===//
 class OneInOneOut<ILOpCode op, dag outs, dag ins,
      string asmstr, list<dag> pattern>
      : ILFormat<op, outs, ins, asmstr, pattern>
 {
     ILDst       dst_reg;
     ILDstMod    dst_mod;
     ILRelAddr   dst_rel;
     ILSrc       dst_reg_rel;
     ILSrcMod    dst_reg_rel_mod;
     ILSrc       src0_reg;
     ILSrcMod    src0_mod;
     ILRelAddr   src0_rel;
     ILSrc       src0_reg_rel;
     ILSrcMod    src0_reg_rel_mod;
 }
 //===--------------------------------------------------------------------===//
 // This class is similiar to the UnaryOp class, however, there is no
 // result value to assign.
 //===--------------------------------------------------------------------===//
 class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
      string asmstr, list<dag> pattern>
      : ILFormat<op, outs, ins, asmstr, pattern>
 {
     ILSrc       src0_reg;
     ILSrcMod    src0_mod;
     ILRelAddr   src0_rel;
     ILSrc       src0_reg_rel;
     ILSrcMod    src0_reg_rel_mod;
 }
 //===--------------------------------------------------------------------===//
 // Set of classes that have two input parameters and one output parameter.
 // The basic pattern for this class is "Opcode Dst, Src0, Src1" and
 // handles the binary math operators and comparison operations.
 // It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
 // if the addressing is register relative for input register 1.
 //===--------------------------------------------------------------------===//
 class TwoInOneOut<ILOpCode op, dag outs, dag ins,
      string asmstr, list<dag> pattern>
      : OneInOneOut<op, outs, ins, asmstr, pattern>
 {
     ILSrc       src1_reg;
     ILSrcMod    src1_mod;
     ILRelAddr   src1_rel;
     ILSrc       src1_reg_rel;
     ILSrcMod    src1_reg_rel_mod;
 }
 //===--------------------------------------------------------------------===//
 // Similiar to the UnaryOpNoRet class, but takes as arguments two input
 // operands. Used mainly for barrier instructions on PC platform.
 //===--------------------------------------------------------------------===//
 class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
      string asmstr, list<dag> pattern>
      : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
 {
     ILSrc       src1_reg;
     ILSrcMod    src1_mod;
     ILRelAddr   src1_rel;
     ILSrc       src1_reg_rel;
     ILSrcMod    src1_reg_rel_mod;
 }
 //===--------------------------------------------------------------------===//
 // Set of classes that have three input parameters and one output parameter.
 // The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
 // handles the mad and conditional mov instruction.
 // It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
 // if the addressing is register relative.
 // This class is the parent class of TernaryOp
 //===--------------------------------------------------------------------===//
 class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
      string asmstr, list<dag> pattern>
      : TwoInOneOut<op, outs, ins, asmstr, pattern> {
           ILSrc       src2_reg;
           ILSrcMod    src2_mod;
           ILRelAddr   src2_rel;
           ILSrc       src2_reg_rel;
           ILSrcMod    src2_reg_rel_mod;
      }
 //===--------------------------------------------------------------------===//
 // Intrinsic classes
 // Generic versions of the above classes but for Target specific intrinsics
 // instead of SDNode patterns.
 //===--------------------------------------------------------------------===//
 let TargetPrefix = "AMDIL", isTarget = 1 in {
     class VoidIntLong :
          Intrinsic<[llvm_i64_ty], [], []>;
     class VoidIntInt :
          Intrinsic<[llvm_i32_ty], [], []>;
     class VoidIntBool :
          Intrinsic<[llvm_i32_ty], [], []>;
     class UnaryIntInt :
          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
     class UnaryIntFloat :
          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
     class ConvertIntFTOI :
          Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
     class ConvertIntITOF :
          Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
     class UnaryIntNoRetInt :
          Intrinsic<[], [llvm_anyint_ty], []>;
     class UnaryIntNoRetFloat :
          Intrinsic<[], [llvm_anyfloat_ty], []>;
     class BinaryIntInt :
          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
     class BinaryIntFloat :
          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
     class BinaryIntNoRetInt :
          Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
     class BinaryIntNoRetFloat :
          Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
     class TernaryIntInt :
          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
          LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
     class TernaryIntFloat :
          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
          LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
     class QuaternaryIntInt :
          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
          LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
     class UnaryAtomicInt :
          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
     class BinaryAtomicInt :
          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
     class TernaryAtomicInt :
          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
     class UnaryAtomicIntNoRet :
          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
     class BinaryAtomicIntNoRet :
          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
     class TernaryAtomicIntNoRet :
          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
 }
--- a/lib/Target/AMDGPU/AMDILFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDILFrameLowering.cpp
@ -0,0 +1,53 @@
 //===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Interface to describe a layout of a stack frame on a AMDIL target machine
 //
 //===----------------------------------------------------------------------===//
 #include "AMDILFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 using namespace llvm;
 AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
    int LAO, unsigned TransAl)
  : TargetFrameLowering(D, StackAl, LAO, TransAl)
 {
 }
 AMDILFrameLowering::~AMDILFrameLowering()
 {
 }
 /// getFrameIndexOffset - Returns the displacement from the frame register to
 /// the stack frame of the specified index.
 int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
                                         int FI) const {
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  return MFI->getObjectOffset(FI);
 }
 const TargetFrameLowering::SpillSlot *
 AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
 {
  NumEntries = 0;
  return 0;
 }
 void
 AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
 {
 }
 void
 AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
 {
 }
 bool
 AMDILFrameLowering::hasFP(const MachineFunction &MF) const
 {
  return false;
 }
--- a/lib/Target/AMDGPU/AMDILFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDILFrameLowering.h
@ -0,0 +1,46 @@
 //===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Interface to describe a layout of a stack frame on a AMDIL target machine
 //
 //===----------------------------------------------------------------------===//
 #ifndef _AMDILFRAME_LOWERING_H_
 #define _AMDILFRAME_LOWERING_H_
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetFrameLowering.h"
 /// Information about the stack frame layout on the AMDIL targets. It holds
 /// the direction of the stack growth, the known stack alignment on entry to
 /// each function, and the offset to the locals area.
 /// See TargetFrameInfo for more comments.
 namespace llvm {
  class AMDILFrameLowering : public TargetFrameLowering {
    public:
      AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
          TransAl = 1);
      virtual ~AMDILFrameLowering();
      virtual int getFrameIndexOffset(const MachineFunction &MF,
                                         int FI) const;
      virtual const SpillSlot *
        getCalleeSavedSpillSlots(unsigned &NumEntries) const;
      virtual void emitPrologue(MachineFunction &MF) const;
      virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
      virtual bool hasFP(const MachineFunction &MF) const;
  }; // class AMDILFrameLowering
 } // namespace llvm
 #endif // _AMDILFRAME_LOWERING_H_
--- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@ -0,0 +1,393 @@
 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file defines an instruction selector for the AMDIL target.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDILDevices.h"
 #include "AMDILUtilityFunctions.h"
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
 #include <list>
 #include <queue>
 using namespace llvm;
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
 // //for SelectionDAG operations.
 //
 namespace {
 class AMDILDAGToDAGISel : public SelectionDAGISel {
  // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
  // make the right decision when generating code for different targets.
  const AMDILSubtarget &Subtarget;
 public:
  AMDILDAGToDAGISel(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
  virtual ~AMDILDAGToDAGISel();
  SDNode *Select(SDNode *N);
  virtual const char *getPassName() const;
 private:
  inline SDValue getSmallIPtrImm(unsigned Imm);
  // Complex pattern selectors
  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
  static bool checkType(const Value *ptr, unsigned int addrspace);
  static const Value *getBasePointerValue(const Value *V);
  static bool isGlobalStore(const StoreSDNode *N);
  static bool isPrivateStore(const StoreSDNode *N);
  static bool isLocalStore(const StoreSDNode *N);
  static bool isRegionStore(const StoreSDNode *N);
  static bool isCPLoad(const LoadSDNode *N);
  static bool isConstantLoad(const LoadSDNode *N, int cbID);
  static bool isGlobalLoad(const LoadSDNode *N);
  static bool isPrivateLoad(const LoadSDNode *N);
  static bool isLocalLoad(const LoadSDNode *N);
  static bool isRegionLoad(const LoadSDNode *N);
  bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
  bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
  // Include the pieces autogenerated from the target description.
 #include "AMDGPUGenDAGISel.inc"
 };
 }  // end anonymous namespace
 // createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
 // DAG, ready for instruction scheduling.
 //
 FunctionPass *llvm::createAMDILISelDag(TargetMachine &TM
                                        AMDIL_OPT_LEVEL_DECL) {
  return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
 }
 AMDILDAGToDAGISel::AMDILDAGToDAGISel(TargetMachine &TM
                                      AMDIL_OPT_LEVEL_DECL)
  : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
 {
 }
 AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
 }
 SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
  return CurDAG->getTargetConstant(Imm, MVT::i32);
 }
 bool AMDILDAGToDAGISel::SelectADDRParam(
    SDValue Addr, SDValue& R1, SDValue& R2) {
  if (Addr.getOpcode() == ISD::FrameIndex) {
    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
      R2 = CurDAG->getTargetConstant(0, MVT::i32);
    } else {
      R1 = Addr;
      R2 = CurDAG->getTargetConstant(0, MVT::i32);
    }
  } else if (Addr.getOpcode() == ISD::ADD) {
    R1 = Addr.getOperand(0);
    R2 = Addr.getOperand(1);
  } else {
    R1 = Addr;
    R2 = CurDAG->getTargetConstant(0, MVT::i32);
  }
  return true;
 }
 bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
      Addr.getOpcode() == ISD::TargetGlobalAddress) {
    return false;
  }
  return SelectADDRParam(Addr, R1, R2);
 }
 bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
      Addr.getOpcode() == ISD::TargetGlobalAddress) {
    return false;
  }
  if (Addr.getOpcode() == ISD::FrameIndex) {
    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
      R2 = CurDAG->getTargetConstant(0, MVT::i64);
    } else {
      R1 = Addr;
      R2 = CurDAG->getTargetConstant(0, MVT::i64);
    }
  } else if (Addr.getOpcode() == ISD::ADD) {
    R1 = Addr.getOperand(0);
    R2 = Addr.getOperand(1);
  } else {
    R1 = Addr;
    R2 = CurDAG->getTargetConstant(0, MVT::i64);
  }
  return true;
 }
 SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::FrameIndex:
    {
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
        unsigned int FI = FIN->getIndex();
        EVT OpVT = N->getValueType(0);
        unsigned int NewOpc = AMDGPU::COPY;
        SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
        return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
      }
    }
    break;
  }
  return SelectCode(N);
 }
 bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
  if (!ptr) {
    return false;
  }
  Type *ptrType = ptr->getType();
  return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
 }
 const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
 {
  if (!V) {
    return NULL;
  }
  const Value *ret = NULL;
  ValueMap<const Value *, bool> ValueBitMap;
  std::queue<const Value *, std::list<const Value *> > ValueQueue;
  ValueQueue.push(V);
  while (!ValueQueue.empty()) {
    V = ValueQueue.front();
    if (ValueBitMap.find(V) == ValueBitMap.end()) {
      ValueBitMap[V] = true;
      if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
        ret = V;
        break;
      } else if (dyn_cast<GlobalVariable>(V)) {
        ret = V;
        break;
      } else if (dyn_cast<Constant>(V)) {
        const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
        if (CE) {
          ValueQueue.push(CE->getOperand(0));
        }
      } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
        ret = AI;
        break;
      } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
        uint32_t numOps = I->getNumOperands();
        for (uint32_t x = 0; x < numOps; ++x) {
          ValueQueue.push(I->getOperand(x));
        }
      } else {
        // assert(0 && "Found a Value that we didn't know how to handle!");
      }
    }
    ValueQueue.pop();
  }
  return ret;
 }
 bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
  return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
 }
 bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
  return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
          && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
          && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
 }
 bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
  return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
 }
 bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
  return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
 }
 bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
  if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
    return true;
  }
  MachineMemOperand *MMO = N->getMemOperand();
  const Value *V = MMO->getValue();
  const Value *BV = getBasePointerValue(V);
  if (MMO
      && MMO->getValue()
      && ((V && dyn_cast<GlobalValue>(V))
          || (BV && dyn_cast<GlobalValue>(
                        getBasePointerValue(MMO->getValue()))))) {
    return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
  } else {
    return false;
  }
 }
 bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
  return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
 }
 bool AMDILDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) {
  return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
 }
 bool AMDILDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) {
  return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
 }
 bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
  MachineMemOperand *MMO = N->getMemOperand();
  if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
    if (MMO) {
      const Value *V = MMO->getValue();
      const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
        return true;
      }
    }
  }
  return false;
 }
 bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
  if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
    // Check to make sure we are not a constant pool load or a constant load
    // that is marked as a private load
    if (isCPLoad(N) || isConstantLoad(N, -1)) {
      return false;
    }
  }
  if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
      && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
      && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
      && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
      && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
      && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
  {
    return true;
  }
  return false;
 }
 const char *AMDILDAGToDAGISel::getPassName() const {
  return "AMDIL DAG->DAG Pattern Instruction Selection";
 }
 #ifdef DEBUGTMP
 #undef INT64_C
 #endif
 #undef DEBUGTMP
 ///==== AMDGPU Functions ====///
 bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
                                             SDValue& Offset) {
  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
      Addr.getOpcode() == ISD::TargetGlobalAddress) {
    return false;
  }
  if (Addr.getOpcode() == ISD::ADD) {
    bool Match = false;
    // Find the base ptr and the offset
    for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
      SDValue Arg = Addr.getOperand(i);
      ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
      // This arg isn't a constant so it must be the base PTR.
      if (!OffsetNode) {
        Base = Addr.getOperand(i);
        continue;
      }
      // Check if the constant argument fits in 8-bits.  The offset is in bytes
      // so we need to convert it to dwords.
      if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
        Match = true;
        Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
                                           MVT::i32);
      }
    }
    return Match;
  }
  // Default case, no offset
  Base = Addr;
  Offset = CurDAG->getTargetConstant(0, MVT::i32);
  return true;
 }
 bool AMDILDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
                                           SDValue &Offset)
 {
  ConstantSDNode * IMMOffset;
  if (Addr.getOpcode() == ISD::ADD
      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
      && isInt<16>(IMMOffset->getZExtValue())) {
      Base = Addr.getOperand(0);
      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
      return true;
  // If the pointer address is constant, we can move it to the offset field.
  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
             && isInt<16>(IMMOffset->getZExtValue())) {
    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
                                  CurDAG->getEntryNode().getDebugLoc(),
                                  AMDGPU::ZERO, MVT::i32);
    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
    return true;
  }
  // Default case, no offset
  Base = Addr;
  Offset = CurDAG->getTargetConstant(0, MVT::i32);
  return true;
 }
 bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
                                      SDValue& Offset) {
  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
      Addr.getOpcode() == ISD::TargetGlobalAddress  ||
      Addr.getOpcode() != ISD::ADD) {
    return false;
  }
  Base = Addr.getOperand(0);
  Offset = Addr.getOperand(1);
  return false;
 }
--- a/lib/Target/AMDGPU/AMDILISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDILISelLowering.cpp
--- a/lib/Target/AMDGPU/AMDILISelLowering.h
+++ b/lib/Target/AMDGPU/AMDILISelLowering.h
@ -0,0 +1,203 @@
 //===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file defines the interfaces that AMDIL uses to lower LLVM code into a
 // selection DAG.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDIL_ISELLOWERING_H_
 #define AMDIL_ISELLOWERING_H_
 #include "AMDIL.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
 namespace llvm
 {
  namespace AMDILISD
  {
    enum
    {
      FIRST_NUMBER = ISD::BUILTIN_OP_END,
      CMOVLOG,     // 32bit FP Conditional move logical instruction
      MAD,         // 32bit Fused Multiply Add instruction
      VBUILD,      // scalar to vector mov instruction
      CALL,        // Function call based on a single integer
      SELECT_CC,   // Select the correct conditional instruction
      UMUL,        // 32bit unsigned multiplication
      DIV_INF,      // Divide with infinity returned on zero divisor
      CMP,
      IL_CC_I_GT,
      IL_CC_I_LT,
      IL_CC_I_GE,
      IL_CC_I_LE,
      IL_CC_I_EQ,
      IL_CC_I_NE,
      RET_FLAG,
      BRANCH_COND,
      LAST_ISD_NUMBER
    };
  } // AMDILISD
  class MachineBasicBlock;
  class MachineInstr;
  class DebugLoc;
  class TargetInstrInfo;
  class AMDILTargetLowering : public TargetLowering
  {
    public:
      AMDILTargetLowering(TargetMachine &TM);
      virtual SDValue
        LowerOperation(SDValue Op, SelectionDAG &DAG) const;
      /// computeMaskedBitsForTargetNode - Determine which of
      /// the bits specified
      /// in Mask are known to be either zero or one and return them in
      /// the
      /// KnownZero/KnownOne bitsets.
      virtual void
        computeMaskedBitsForTargetNode(
            const SDValue Op,
            APInt &KnownZero,
            APInt &KnownOne,
            const SelectionDAG &DAG,
            unsigned Depth = 0
            ) const;
      virtual bool 
        getTgtMemIntrinsic(IntrinsicInfo &Info,
                                  const CallInst &I, unsigned Intrinsic) const;
      virtual const char*
        getTargetNodeName(
            unsigned Opcode
            ) const;
      // We want to mark f32/f64 floating point values as
      // legal
      bool
        isFPImmLegal(const APFloat &Imm, EVT VT) const;
      // We don't want to shrink f64/f32 constants because
      // they both take up the same amount of space and
      // we don't want to use a f2d instruction.
      bool ShouldShrinkFPConstant(EVT VT) const;
      /// getFunctionAlignment - Return the Log2 alignment of this
      /// function.
      virtual unsigned int
        getFunctionAlignment(const Function *F) const;
    private:
      CCAssignFn*
        CCAssignFnForNode(unsigned int CC) const;
      SDValue LowerCallResult(SDValue Chain,
          SDValue InFlag,
          CallingConv::ID CallConv,
          bool isVarArg,
          const SmallVectorImpl<ISD::InputArg> &Ins,
          DebugLoc dl,
          SelectionDAG &DAG,
          SmallVectorImpl<SDValue> &InVals) const;
      SDValue LowerMemArgument(SDValue Chain,
          CallingConv::ID CallConv,
          const SmallVectorImpl<ISD::InputArg> &ArgInfo,
          DebugLoc dl, SelectionDAG &DAG,
          const CCValAssign &VA,  MachineFrameInfo *MFI,
          unsigned i) const;
      SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
          SDValue Arg,
          DebugLoc dl, SelectionDAG &DAG,
          const CCValAssign &VA,
          ISD::ArgFlagsTy Flags) const;
      virtual SDValue
        LowerFormalArguments(SDValue Chain,
            CallingConv::ID CallConv, bool isVarArg,
            const SmallVectorImpl<ISD::InputArg> &Ins,
            DebugLoc dl, SelectionDAG &DAG,
            SmallVectorImpl<SDValue> &InVals) const;
      virtual SDValue
        LowerCall(CallLoweringInfo &CLI,
        SmallVectorImpl<SDValue> &InVals) const;
      virtual SDValue
        LowerReturn(SDValue Chain,
            CallingConv::ID CallConv, bool isVarArg,
            const SmallVectorImpl<ISD::OutputArg> &Outs,
            const SmallVectorImpl<SDValue> &OutVals,
            DebugLoc dl, SelectionDAG &DAG) const;
      SDValue
        LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSREM(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
      EVT
        genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
      SDValue
        LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
      SDValue
        LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
  }; // AMDILTargetLowering
 } // end namespace llvm
 #endif    // AMDIL_ISELLOWERING_H_
--- a/lib/Target/AMDGPU/AMDILInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDILInstrInfo.cpp
@ -0,0 +1,508 @@
 //===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file contains the AMDIL implementation of the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDILInstrInfo.h"
 #include "AMDIL.h"
 #include "AMDILISelLowering.h"
 #include "AMDILUtilityFunctions.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Instructions.h"
 #define GET_INSTRINFO_CTOR
 #include "AMDGPUGenInstrInfo.inc"
 using namespace llvm;
 AMDILInstrInfo::AMDILInstrInfo(TargetMachine &tm)
  : AMDILGenInstrInfo(),
    RI(tm, *this) {
 }
 const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
  return RI;
 }
 bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
                                           unsigned &SrcReg, unsigned &DstReg,
                                           unsigned &SubIdx) const {
 // TODO: Implement this function
  return false;
 }
 unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                             int &FrameIndex) const {
 // TODO: Implement this function
  return 0;
 }
 unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
                                                   int &FrameIndex) const {
 // TODO: Implement this function
  return 0;
 }
 bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
                                          const MachineMemOperand *&MMO,
                                          int &FrameIndex) const {
 // TODO: Implement this function
  return false;
 }
 unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
                                              int &FrameIndex) const {
 // TODO: Implement this function
  return 0;
 }
 unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
                                                    int &FrameIndex) const {
 // TODO: Implement this function
  return 0;
 }
 bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
                                           const MachineMemOperand *&MMO,
                                           int &FrameIndex) const {
 // TODO: Implement this function
  return false;
 }
 MachineInstr *
 AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                                      MachineBasicBlock::iterator &MBBI,
                                      LiveVariables *LV) const {
 // TODO: Implement this function
  return NULL;
 }
 bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
                                        MachineBasicBlock &MBB) const {
  while (iter != MBB.end()) {
    switch (iter->getOpcode()) {
    default:
      break;
      ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
    case AMDGPU::BRANCH:
      return true;
    };
    ++iter;
  }
  return false;
 }
 bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                   MachineBasicBlock *&TBB,
                                   MachineBasicBlock *&FBB,
                                   SmallVectorImpl<MachineOperand> &Cond,
                                   bool AllowModify) const {
  bool retVal = true;
  return retVal;
  MachineBasicBlock::iterator iter = MBB.begin();
  if (!getNextBranchInstr(iter, MBB)) {
    retVal = false;
  } else {
    MachineInstr *firstBranch = iter;
    if (!getNextBranchInstr(++iter, MBB)) {
      if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
        TBB = firstBranch->getOperand(0).getMBB();
        firstBranch->eraseFromParent();
        retVal = false;
      } else {
        TBB = firstBranch->getOperand(0).getMBB();
        FBB = *(++MBB.succ_begin());
        if (FBB == TBB) {
          FBB = *(MBB.succ_begin());
        }
        Cond.push_back(firstBranch->getOperand(1));
        retVal = false;
      }
    } else {
      MachineInstr *secondBranch = iter;
      if (!getNextBranchInstr(++iter, MBB)) {
        if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
          TBB = firstBranch->getOperand(0).getMBB();
          Cond.push_back(firstBranch->getOperand(1));
          FBB = secondBranch->getOperand(0).getMBB();
          secondBranch->eraseFromParent();
          retVal = false;
        } else {
          assert(0 && "Should not have two consecutive conditional branches");
        }
      } else {
        MBB.getParent()->viewCFG();
        assert(0 && "Should not have three branch instructions in"
               " a single basic block");
        retVal = false;
      }
    }
  }
  return retVal;
 }
 unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const {
  const MachineInstr *MI = op.getParent();
  switch (MI->getDesc().OpInfo->RegClass) {
  default: // FIXME: fallthrough??
  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
  };
 }
 unsigned int
 AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
                             MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond,
                             DebugLoc DL) const
 {
  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
  for (unsigned int x = 0; x < Cond.size(); ++x) {
    Cond[x].getParent()->dump();
  }
  if (FBB == 0) {
    if (Cond.empty()) {
      BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
    } else {
      BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
        .addMBB(TBB).addReg(Cond[0].getReg());
    }
    return 1;
  } else {
    BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
      .addMBB(TBB).addReg(Cond[0].getReg());
    BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
  }
  assert(0 && "Inserting two branches not supported");
  return 0;
 }
 unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin()) {
    return 0;
  }
  --I;
  switch (I->getOpcode()) {
  default:
    return 0;
    ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
  case AMDGPU::BRANCH:
    I->eraseFromParent();
    break;
  }
  I = MBB.end();
  if (I == MBB.begin()) {
    return 1;
  }
  --I;
  switch (I->getOpcode()) {
    // FIXME: only one case??
  default:
    return 1;
    ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
    I->eraseFromParent();
    break;
  }
  return 2;
 }
 MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
  MachineBasicBlock::iterator tmp = MBB->end();
  if (!MBB->size()) {
    return MBB->end();
  }
  while (--tmp) {
    if (tmp->getOpcode() == AMDGPU::ENDLOOP
        || tmp->getOpcode() == AMDGPU::ENDIF
        || tmp->getOpcode() == AMDGPU::ELSE) {
      if (tmp == MBB->begin()) {
        return tmp;
      } else {
        continue;
      }
    }  else {
      return ++tmp;
    }
  }
  return MBB->end();
 }
 void
 AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    unsigned SrcReg, bool isKill,
                                    int FrameIndex,
                                    const TargetRegisterClass *RC,
                                    const TargetRegisterInfo *TRI) const {
  unsigned int Opc = 0;
  // MachineInstr *curMI = MI;
  MachineFunction &MF = *(MBB.getParent());
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  DebugLoc DL;
  switch (RC->getID()) {
  case AMDGPU::GPRF32RegClassID:
    Opc = AMDGPU::PRIVATESTORE_f32;
    break;
  case AMDGPU::GPRI32RegClassID:
    Opc = AMDGPU::PRIVATESTORE_i32;
    break;
  }
  if (MI != MBB.end()) DL = MI->getDebugLoc();
  MachineMemOperand *MMO =
   new MachineMemOperand(
        MachinePointerInfo::getFixedStack(FrameIndex),
                          MachineMemOperand::MOLoad,
                          MFI.getObjectSize(FrameIndex),
                          MFI.getObjectAlignment(FrameIndex));
  if (MI != MBB.end()) {
    DL = MI->getDebugLoc();
  }
  BuildMI(MBB, MI, DL, get(Opc))
    .addReg(SrcReg, getKillRegState(isKill))
    .addFrameIndex(FrameIndex)
    .addMemOperand(MMO)
    .addImm(0);
 }
 void
 AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const {
  unsigned int Opc = 0;
  MachineFunction &MF = *(MBB.getParent());
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  DebugLoc DL;
  switch (RC->getID()) {
  case AMDGPU::GPRF32RegClassID:
    Opc = AMDGPU::PRIVATELOAD_f32;
    break;
  case AMDGPU::GPRI32RegClassID:
    Opc = AMDGPU::PRIVATELOAD_i32;
    break;
  }
  MachineMemOperand *MMO =
    new MachineMemOperand(
        MachinePointerInfo::getFixedStack(FrameIndex),
                          MachineMemOperand::MOLoad,
                          MFI.getObjectSize(FrameIndex),
                          MFI.getObjectAlignment(FrameIndex));
  if (MI != MBB.end()) {
    DL = MI->getDebugLoc();
  }
  BuildMI(MBB, MI, DL, get(Opc))
    .addReg(DestReg, RegState::Define)
    .addFrameIndex(FrameIndex)
    .addMemOperand(MMO)
    .addImm(0);
 }
 MachineInstr *
 AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr *MI,
                                      const SmallVectorImpl<unsigned> &Ops,
                                      int FrameIndex) const {
 // TODO: Implement this function
  return 0;
 }
 MachineInstr*
 AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr *MI,
                                      const SmallVectorImpl<unsigned> &Ops,
                                      MachineInstr *LoadMI) const {
  // TODO: Implement this function
  return 0;
 }
 bool
 AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
                                     const SmallVectorImpl<unsigned> &Ops) const
 {
  // TODO: Implement this function
  return false;
 }
 bool
 AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                 unsigned Reg, bool UnfoldLoad,
                                 bool UnfoldStore,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
  // TODO: Implement this function
  return false;
 }
 bool
 AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                                    SmallVectorImpl<SDNode*> &NewNodes) const {
  // TODO: Implement this function
  return false;
 }
 unsigned
 AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
                                           bool UnfoldLoad, bool UnfoldStore,
                                           unsigned *LoadRegIndex) const {
  // TODO: Implement this function
  return 0;
 }
 bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                             int64_t Offset1, int64_t Offset2,
                                             unsigned NumLoads) const {
  assert(Offset2 > Offset1
         && "Second offset should be larger than first offset!");
  // If we have less than 16 loads in a row, and the offsets are within 16,
  // then schedule together.
  // TODO: Make the loads schedule near if it fits in a cacheline
  return (NumLoads < 16 && (Offset2 - Offset1) < 16);
 }
 bool
 AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
  const {
  // TODO: Implement this function
  return true;
 }
 void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MI) const {
  // TODO: Implement this function
 }
 bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
  // TODO: Implement this function
  return false;
 }
 bool
 AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                                  const SmallVectorImpl<MachineOperand> &Pred2)
  const {
  // TODO: Implement this function
  return false;
 }
 bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
                                      std::vector<MachineOperand> &Pred) const {
  // TODO: Implement this function
  return false;
 }
 bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const {
  // TODO: Implement this function
  return MI->getDesc().isPredicable();
 }
 bool
 AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
  // TODO: Implement this function
  return true;
 }
 bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const {
  if (strstr(getName(MI->getOpcode()), "LOADCONST")) {
    return false;
  }
  return strstr(getName(MI->getOpcode()), "LOAD");
 }
 bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const
 {
  return false;
 }
 bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "EXTLOAD");
 }
 bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "SEXTLOAD");
 }
 bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "AEXTLOAD");
 }
 bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "ZEXTLOAD");
 }
 bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "STORE");
 }
 bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "TRUNCSTORE");
 }
 bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const {
  return strstr(getName(MI->getOpcode()), "ATOM");
 }
 bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const {
  if (!MI->memoperands_empty()) {
    for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
        moe = MI->memoperands_end(); mob != moe; ++mob) {
      // If there is a volatile mem operand, this is a volatile instruction.
      if ((*mob)->isVolatile()) {
        return true;
      }
    }
  }
  return false;
 }
 bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "GLOBAL");
 }
 bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "PRIVATE");
 }
 bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "CONSTANT")
    || strstr(getName(MI->getOpcode()), "CPOOL");
 }
 bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "REGION");
 }
 bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "LOCAL");
 }
 bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "IMAGE");
 }
 bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "APPEND");
 }
 bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "ATOM_R");
 }
 bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "ATOM_L");
 }
 bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "ATOM_G")
    || isArenaAtomic(MI);
 }
 bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const
 {
  return strstr(getName(MI->getOpcode()), "ATOM_A");
 }
--- a/lib/Target/AMDGPU/AMDILInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDILInstrInfo.h
@ -0,0 +1,160 @@
 //===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file contains the AMDIL implementation of the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDILINSTRUCTIONINFO_H_
 #define AMDILINSTRUCTIONINFO_H_
 #include "AMDILRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #define GET_INSTRINFO_HEADER
 #include "AMDGPUGenInstrInfo.inc"
 namespace llvm {
  // AMDIL - This namespace holds all of the target specific flags that
  // instruction info tracks.
  //
  //class AMDILTargetMachine;
 class AMDILInstrInfo : public AMDILGenInstrInfo {
 private:
  const AMDILRegisterInfo RI;
  bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
                          MachineBasicBlock &MBB) const;
  unsigned int getBranchInstr(const MachineOperand &op) const;
 public:
  explicit AMDILInstrInfo(TargetMachine &tm);
  // getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
  // such, whenever a client has an instance of instruction info, it should
  // always be able to get register info as well (through this method).
  const AMDILRegisterInfo &getRegisterInfo() const;
  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
                             unsigned &DstReg, unsigned &SubIdx) const;
  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
                                     int &FrameIndex) const;
  bool hasLoadFromStackSlot(const MachineInstr *MI,
                            const MachineMemOperand *&MMO,
                            int &FrameIndex) const;
  unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
  unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
                                      int &FrameIndex) const;
  bool hasStoreFromStackSlot(const MachineInstr *MI,
                             const MachineMemOperand *&MMO,
                             int &FrameIndex) const;
  MachineInstr *
  convertToThreeAddress(MachineFunction::iterator &MFI,
                        MachineBasicBlock::iterator &MBBI,
                        LiveVariables *LV) const;
  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                     MachineBasicBlock *&FBB,
                     SmallVectorImpl<MachineOperand> &Cond,
                     bool AllowModify) const;
  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
  unsigned
  InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
               MachineBasicBlock *FBB,
               const SmallVectorImpl<MachineOperand> &Cond,
               DebugLoc DL) const;
  virtual void copyPhysReg(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI, DebugLoc DL,
                           unsigned DestReg, unsigned SrcReg,
                           bool KillSrc) const = 0;
  void storeRegToStackSlot(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
                           unsigned SrcReg, bool isKill, int FrameIndex,
                           const TargetRegisterClass *RC,
                           const TargetRegisterInfo *TRI) const;
  void loadRegFromStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI,
                            unsigned DestReg, int FrameIndex,
                            const TargetRegisterClass *RC,
                            const TargetRegisterInfo *TRI) const;
 protected:
  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr *MI,
                                      const SmallVectorImpl<unsigned> &Ops,
                                      int FrameIndex) const;
  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr *MI,
                                      const SmallVectorImpl<unsigned> &Ops,
                                      MachineInstr *LoadMI) const;
 public:
  bool canFoldMemoryOperand(const MachineInstr *MI,
                            const SmallVectorImpl<unsigned> &Ops) const;
  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                           SmallVectorImpl<MachineInstr *> &NewMIs) const;
  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                           SmallVectorImpl<SDNode *> &NewNodes) const;
  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
                                      bool UnfoldLoad, bool UnfoldStore,
                                      unsigned *LoadRegIndex = 0) const;
  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                               int64_t Offset1, int64_t Offset2,
                               unsigned NumLoads) const;
  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
  void insertNoop(MachineBasicBlock &MBB,
                  MachineBasicBlock::iterator MI) const;
  bool isPredicated(const MachineInstr *MI) const;
  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                         const SmallVectorImpl<MachineOperand> &Pred2) const;
  bool DefinesPredicate(MachineInstr *MI,
                        std::vector<MachineOperand> &Pred) const;
  bool isPredicable(MachineInstr *MI) const;
  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
  // Helper functions that check the opcode for status information
  bool isLoadInst(llvm::MachineInstr *MI) const;
  bool isExtLoadInst(llvm::MachineInstr *MI) const;
  bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
  bool isSExtLoadInst(llvm::MachineInstr *MI) const;
  bool isZExtLoadInst(llvm::MachineInstr *MI) const;
  bool isAExtLoadInst(llvm::MachineInstr *MI) const;
  bool isStoreInst(llvm::MachineInstr *MI) const;
  bool isTruncStoreInst(llvm::MachineInstr *MI) const;
  bool isAtomicInst(llvm::MachineInstr *MI) const;
  bool isVolatileInst(llvm::MachineInstr *MI) const;
  bool isGlobalInst(llvm::MachineInstr *MI) const;
  bool isPrivateInst(llvm::MachineInstr *MI) const;
  bool isConstantInst(llvm::MachineInstr *MI) const;
  bool isRegionInst(llvm::MachineInstr *MI) const;
  bool isLocalInst(llvm::MachineInstr *MI) const;
  bool isImageInst(llvm::MachineInstr *MI) const;
  bool isAppendInst(llvm::MachineInstr *MI) const;
  bool isRegionAtomic(llvm::MachineInstr *MI) const;
  bool isLocalAtomic(llvm::MachineInstr *MI) const;
  bool isGlobalAtomic(llvm::MachineInstr *MI) const;
  bool isArenaAtomic(llvm::MachineInstr *MI) const;
  virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
                                        int64_t Imm) const = 0;
  virtual unsigned getIEQOpcode() const = 0;
  virtual bool isMov(unsigned Opcode) const = 0;
 };
 }
 #endif // AMDILINSTRINFO_H_
--- a/lib/Target/AMDGPU/AMDILInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDILInstrInfo.td
@ -0,0 +1,108 @@
 //===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file describes the AMDIL instructions in TableGen format.
 //
 //===----------------------------------------------------------------------===//
 // AMDIL Instruction Predicate Definitions
 // Predicate that is set to true if the hardware supports double precision
 // divide
 def HasHWDDiv                 : Predicate<"Subtarget.device()"
                           "->getGeneration() > AMDILDeviceInfo::HD4XXX && "
              "Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
 // Predicate that is set to true if the hardware supports double, but not double
 // precision divide in hardware
 def HasSWDDiv             : Predicate<"Subtarget.device()"
                           "->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
              "Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
 // Predicate that is set to true if the hardware support 24bit signed
 // math ops. Otherwise a software expansion to 32bit math ops is used instead.
 def HasHWSign24Bit          : Predicate<"Subtarget.device()"
                            "->getGeneration() > AMDILDeviceInfo::HD5XXX">;
 // Predicate that is set to true if 64bit operations are supported or not
 def HasHW64Bit              : Predicate<"Subtarget.device()"
                            "->usesHardware(AMDILDeviceInfo::LongOps)">;
 def HasSW64Bit              : Predicate<"Subtarget.device()"
                            "->usesSoftware(AMDILDeviceInfo::LongOps)">;
 // Predicate that is set to true if the timer register is supported
 def HasTmrRegister          : Predicate<"Subtarget.device()"
                            "->isSupported(AMDILDeviceInfo::TmrReg)">;
 // Predicate that is true if we are at least evergreen series
 def HasDeviceIDInst         : Predicate<"Subtarget.device()"
                            "->getGeneration() >= AMDILDeviceInfo::HD5XXX">;
 // Predicate that is true if we have region address space.
 def hasRegionAS             : Predicate<"Subtarget.device()"
                            "->usesHardware(AMDILDeviceInfo::RegionMem)">;
 // Predicate that is false if we don't have region address space.
 def noRegionAS             : Predicate<"!Subtarget.device()"
                            "->isSupported(AMDILDeviceInfo::RegionMem)">;
 // Predicate that is set to true if 64bit Mul is supported in the IL or not
 def HasHW64Mul              : Predicate<"Subtarget.calVersion()" 
                                          ">= CAL_VERSION_SC_139"
                                          "&& Subtarget.device()"
                                          "->getGeneration() >="
                                          "AMDILDeviceInfo::HD5XXX">;
 def HasSW64Mul              : Predicate<"Subtarget.calVersion()" 
                                          "< CAL_VERSION_SC_139">;
 // Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
 def HasHW64DivMod           : Predicate<"Subtarget.device()"
                            "->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
 def HasSW64DivMod           : Predicate<"Subtarget.device()"
                            "->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">;
 // Predicate that is set to true if 64bit pointer are used.
 def Has64BitPtr             : Predicate<"Subtarget.is64bit()">;
 def Has32BitPtr             : Predicate<"!Subtarget.is64bit()">;
 //===--------------------------------------------------------------------===//
 // Custom Operands
 //===--------------------------------------------------------------------===//
 include "AMDILOperands.td"
 //===--------------------------------------------------------------------===//
 // Custom Selection DAG Type Profiles
 //===--------------------------------------------------------------------===//
 include "AMDILProfiles.td"
 //===--------------------------------------------------------------------===//
 // Custom Selection DAG Nodes
 //===--------------------------------------------------------------------===//
 include "AMDILNodes.td"
 //===--------------------------------------------------------------------===//
 // Custom Pattern DAG Nodes
 //===--------------------------------------------------------------------===//
 include "AMDILPatterns.td"
 //===----------------------------------------------------------------------===//
 // Instruction format classes
 //===----------------------------------------------------------------------===//
 include "AMDILFormats.td"
 //===--------------------------------------------------------------------===//
 // Multiclass Instruction formats
 //===--------------------------------------------------------------------===//
 include "AMDILMultiClass.td"
 //===--------------------------------------------------------------------===//
 // Intrinsics support
 //===--------------------------------------------------------------------===//
 include "AMDILIntrinsics.td"
 //===--------------------------------------------------------------------===//
 // Instructions support
 //===--------------------------------------------------------------------===//
 include "AMDILInstructions.td"
--- a/lib/Target/AMDGPU/AMDILInstructions.td
+++ b/lib/Target/AMDGPU/AMDILInstructions.td
@ -0,0 +1,143 @@
 //===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 let Predicates = [Has32BitPtr] in {
  let isCodeGenOnly=1 in {
    //===----------------------------------------------------------------------===//
    // Store Memory Operations
    //===----------------------------------------------------------------------===//
    defm GLOBALTRUNCSTORE   : GTRUNCSTORE<"!global trunc store">;
    defm LOCALTRUNCSTORE    : LTRUNCSTORE<"!local trunc store">;
    defm LOCALSTORE         : STORE<"!local store"          , local_store>;
    defm PRIVATETRUNCSTORE  : PTRUNCSTORE<"!private trunc store">;
    defm PRIVATESTORE       : STORE<"!private store"        , private_store>;
    defm REGIONTRUNCSTORE   : RTRUNCSTORE<"!region trunc store">;
    defm REGIONSTORE        : STORE<"!region hw store"      , region_store>;
    //===---------------------------------------------------------------------===//
    // Load Memory Operations
    //===---------------------------------------------------------------------===//
    defm GLOBALZEXTLOAD     : LOAD<"!global zext load"       , global_zext_load>;
    defm GLOBALSEXTLOAD     : LOAD<"!global sext load"       , global_sext_load>;
    defm GLOBALAEXTLOAD     : LOAD<"!global aext load"       , global_aext_load>;
    defm PRIVATELOAD        : LOAD<"!private load"           , private_load>;
    defm PRIVATEZEXTLOAD    : LOAD<"!private zext load"      , private_zext_load>;
    defm PRIVATESEXTLOAD    : LOAD<"!private sext load"      , private_sext_load>;
    defm PRIVATEAEXTLOAD    : LOAD<"!private aext load"      , private_aext_load>;
    defm CPOOLLOAD          : LOAD<"!constant pool load"     , cp_load>;
    defm CPOOLZEXTLOAD      : LOAD<"!constant pool zext load", cp_zext_load>;
    defm CPOOLSEXTLOAD      : LOAD<"!constant pool sext load", cp_sext_load>;
    defm CPOOLAEXTLOAD      : LOAD<"!constant aext pool load", cp_aext_load>;
    defm CONSTANTLOAD       : LOAD<"!constant load"          , constant_load>;
    defm CONSTANTZEXTLOAD   : LOAD<"!constant zext load"     , constant_zext_load>;
    defm CONSTANTSEXTLOAD   : LOAD<"!constant sext load"     , constant_sext_load>;
    defm CONSTANTAEXTLOAD   : LOAD<"!constant aext load"     , constant_aext_load>;
    defm LOCALLOAD          : LOAD<"!local load"             , local_load>;
    defm LOCALZEXTLOAD      : LOAD<"!local zext load"        , local_zext_load>;
    defm LOCALSEXTLOAD      : LOAD<"!local sext load"        , local_sext_load>;
    defm LOCALAEXTLOAD      : LOAD<"!local aext load"        , local_aext_load>;
    defm REGIONLOAD         : LOAD<"!region load"            , region_load>;
    defm REGIONZEXTLOAD     : LOAD<"!region zext load"       , region_zext_load>;
    defm REGIONSEXTLOAD     : LOAD<"!region sext load"       , region_sext_load>;
    defm REGIONAEXTLOAD     : LOAD<"!region aext load"       , region_aext_load>;
  }
 }
 //===---------------------------------------------------------------------===//
 // Custom Inserter for Branches and returns, this eventually will be a
 // seperate pass
 //===---------------------------------------------------------------------===//
 let isTerminator = 1 in {
  def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
      "; Pseudo unconditional branch instruction",
      [(br bb:$target)]>;
  defm BRANCH_COND : BranchConditional<IL_brcond>;
 }
 //===---------------------------------------------------------------------===//
 // return instructions
 //===---------------------------------------------------------------------===//
 let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
  def RETURN          : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
      IL_OP_RET.Text, [(IL_retflag)]>;
 }
 //===---------------------------------------------------------------------===//
 // Handle a function call
 //===---------------------------------------------------------------------===//
 let isCall = 1,
    Defs = [
    R1, R2, R3, R4, R5, R6, R7, R8, R9, R10
    ]
    ,
    Uses = [
    R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
    ]
    in {
      def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
      (ins calltarget:$dst), 
      !strconcat(IL_OP_CALL.Text, " $dst"), []>;
    }
 //===---------------------------------------------------------------------===//
 // Flow and Program control Instructions
 //===---------------------------------------------------------------------===//
 let isTerminator=1 in {
  def SWITCH      : ILFormat<IL_OP_SWITCH, (outs), (ins GPRI32:$src),
  !strconcat(IL_OP_SWITCH.Text, " $src"), []>;
  def CASE        : ILFormat<IL_OP_CASE, (outs), (ins GPRI32:$src),
      !strconcat(IL_OP_CASE.Text, " $src"), []>;
  def BREAK       : ILFormat<IL_OP_BREAK, (outs), (ins),
      IL_OP_BREAK.Text, []>;
  def CONTINUE    : ILFormat<IL_OP_CONTINUE, (outs), (ins),
      IL_OP_CONTINUE.Text, []>;
  def DEFAULT     : ILFormat<IL_OP_DEFAULT, (outs), (ins),
      IL_OP_DEFAULT.Text, []>;
  def ELSE        : ILFormat<IL_OP_ELSE, (outs), (ins),
      IL_OP_ELSE.Text, []>;
  def ENDSWITCH   : ILFormat<IL_OP_ENDSWITCH, (outs), (ins),
      IL_OP_ENDSWITCH.Text, []>;
  def ENDMAIN     : ILFormat<IL_OP_ENDMAIN, (outs), (ins),
      IL_OP_ENDMAIN.Text, []>;
  def END         : ILFormat<IL_OP_END, (outs), (ins),
      IL_OP_END.Text, []>;
  def ENDFUNC     : ILFormat<IL_OP_ENDFUNC, (outs), (ins),
      IL_OP_ENDFUNC.Text, []>;
  def ENDIF       : ILFormat<IL_OP_ENDIF, (outs), (ins),
      IL_OP_ENDIF.Text, []>;
  def WHILELOOP   : ILFormat<IL_OP_WHILE, (outs), (ins),
      IL_OP_WHILE.Text, []>;
  def ENDLOOP     : ILFormat<IL_OP_ENDLOOP, (outs), (ins),
      IL_OP_ENDLOOP.Text, []>;
  def FUNC        : ILFormat<IL_OP_FUNC, (outs), (ins),
      IL_OP_FUNC.Text, []>;
  def RETDYN      : ILFormat<IL_OP_RET_DYN, (outs), (ins),
      IL_OP_RET_DYN.Text, []>;
  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
  defm IF_LOGICALNZ  : BranchInstr<IL_OP_IF_LOGICALNZ>;
  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
  defm IF_LOGICALZ   : BranchInstr<IL_OP_IF_LOGICALZ>;
  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
  defm BREAK_LOGICALNZ : BranchInstr<IL_OP_BREAK_LOGICALNZ>;
  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
  defm BREAK_LOGICALZ : BranchInstr<IL_OP_BREAK_LOGICALZ>;
  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
  defm CONTINUE_LOGICALNZ : BranchInstr<IL_OP_CONTINUE_LOGICALNZ>;
  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
  defm CONTINUE_LOGICALZ : BranchInstr<IL_OP_CONTINUE_LOGICALZ>;
  defm IFC         : BranchInstr2<IL_OP_IFC>;
  defm BREAKC      : BranchInstr2<IL_OP_BREAKC>;
  defm CONTINUEC   : BranchInstr2<IL_OP_CONTINUEC>;
 }
 let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
  def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
      IL_OP_NOP.Text, [(trap)]>;
 }
--- a/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
+++ b/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
@ -0,0 +1,93 @@
 //===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file contains the AMDIL Implementation of the IntrinsicInfo class.
 //
 //===-----------------------------------------------------------------------===//
 #include "AMDILIntrinsicInfo.h"
 #include "AMDIL.h"
 #include "AMDILSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
 using namespace llvm;
 #define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
 #include "AMDGPUGenIntrinsics.inc"
 #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
 AMDILIntrinsicInfo::AMDILIntrinsicInfo(TargetMachine *tm) 
  : TargetIntrinsicInfo()
 {
 }
 std::string 
 AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
    unsigned int numTys) const 
 {
  static const char* const names[] = {
 #define GET_INTRINSIC_NAME_TABLE
 #include "AMDGPUGenIntrinsics.inc"
 #undef GET_INTRINSIC_NAME_TABLE
  };
  //assert(!isOverloaded(IntrID)
  //&& "AMDIL Intrinsics are not overloaded");
  if (IntrID < Intrinsic::num_intrinsics) {
    return 0;
  }
  assert(IntrID < AMDGPUIntrinsic::num_AMDIL_intrinsics
      && "Invalid intrinsic ID");
  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
  return Result;
 }
 unsigned int
 AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const 
 {
 #define GET_FUNCTION_RECOGNIZER
 #include "AMDGPUGenIntrinsics.inc"
 #undef GET_FUNCTION_RECOGNIZER
  AMDGPUIntrinsic::ID IntrinsicID
    = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
  IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
  if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
    return IntrinsicID;
  }
  return 0;
 }
 bool 
 AMDILIntrinsicInfo::isOverloaded(unsigned id) const 
 {
  // Overload Table
 #define GET_INTRINSIC_OVERLOAD_TABLE
 #include "AMDGPUGenIntrinsics.inc"
 #undef GET_INTRINSIC_OVERLOAD_TABLE
 }
 /// This defines the "getAttributes(ID id)" method.
 #define GET_INTRINSIC_ATTRIBUTES
 #include "AMDGPUGenIntrinsics.inc"
 #undef GET_INTRINSIC_ATTRIBUTES
 Function*
 AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
    Type **Tys,
    unsigned numTys) const 
 {
  //Silence a warning
  AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
  (void)List;
  assert(!"Not implemented");
 }
--- a/lib/Target/AMDGPU/AMDILIntrinsicInfo.h
+++ b/lib/Target/AMDGPU/AMDILIntrinsicInfo.h
@ -0,0 +1,47 @@
 //===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 //   Interface for the AMDIL Implementation of the Intrinsic Info class.
 //
 //===-----------------------------------------------------------------------===//
 #ifndef _AMDIL_INTRINSICS_H_
 #define _AMDIL_INTRINSICS_H_
 #include "llvm/Intrinsics.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 namespace llvm {
  class TargetMachine;
  namespace AMDGPUIntrinsic {
    enum ID {
      last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
 #define GET_INTRINSIC_ENUM_VALUES
 #include "AMDGPUGenIntrinsics.inc"
 #undef GET_INTRINSIC_ENUM_VALUES
      , num_AMDIL_intrinsics
    };
  }
  class AMDILIntrinsicInfo : public TargetIntrinsicInfo {
    public:
      AMDILIntrinsicInfo(TargetMachine *tm);
      std::string getName(unsigned int IntrId, Type **Tys = 0,
          unsigned int numTys = 0) const;
      unsigned int lookupName(const char *Name, unsigned int Len) const;
      bool isOverloaded(unsigned int IID) const;
      Function *getDeclaration(Module *M, unsigned int ID,
          Type **Tys = 0,
          unsigned int numTys = 0) const;
  }; // AMDILIntrinsicInfo
 }
 #endif // _AMDIL_INTRINSICS_H_
--- a/lib/Target/AMDGPU/AMDILIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDILIntrinsics.td
@ -0,0 +1,705 @@
 //===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file defines all of the amdil-specific intrinsics
 //
 //===---------------------------------------------------------------===//
 let TargetPrefix = "AMDIL", isTarget = 1 in {
 //------------- Synchronization Functions - OpenCL 6.11.9 --------------------//
  def int_AMDIL_fence   : GCCBuiltin<"mem_fence">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_global   : GCCBuiltin<"mem_fence_global">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_local   : GCCBuiltin<"mem_fence_local">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_region   : GCCBuiltin<"mem_fence_region">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_read_only   : GCCBuiltin<"read_mem_fence">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_read_only_global   : GCCBuiltin<"read_mem_fence_global">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_read_only_local   : GCCBuiltin<"read_mem_fence_local">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_read_only_region : GCCBuiltin<"read_mem_fence_region">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_write_only   : GCCBuiltin<"write_mem_fence">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_write_only_global   : GCCBuiltin<"write_mem_fence_global">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_write_only_local   : GCCBuiltin<"write_mem_fence_local">,
        UnaryIntNoRetInt;
  def int_AMDIL_fence_write_only_region : GCCBuiltin<"write_mem_fence_region">,
        UnaryIntNoRetInt;
  def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">,
        UnaryIntNoRetInt;
  def int_AMDIL_cmov_logical  : GCCBuiltin<"__amdil_cmov_logical">,
          TernaryIntInt;
  def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
  def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
  def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
          TernaryIntInt;
  def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
          TernaryIntInt;
  def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
          UnaryIntInt;
  def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
          UnaryIntInt;
  def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
          UnaryIntInt;
  def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
          UnaryIntInt;
  def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
          UnaryIntInt;
  def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
                    TernaryIntInt;
  def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
                    TernaryIntInt;
  def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
                    QuaternaryIntInt;
  def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
      TernaryIntInt;
  def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
      BinaryIntInt;
  def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
          TernaryIntInt;
  def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
          TernaryIntInt;
  def int_AMDIL_mad     : GCCBuiltin<"__amdil_mad">,
          TernaryIntFloat;
  def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
          BinaryIntInt;
  def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
          BinaryIntInt;
  def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
          BinaryIntInt;
  def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
          BinaryIntInt;
  def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
          BinaryIntInt;
  def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
          BinaryIntInt;
  def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
          TernaryIntInt;
  def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
          TernaryIntInt;
  def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
          BinaryIntInt;
  def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
          BinaryIntInt;
  def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
          BinaryIntInt;
  def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
          BinaryIntInt;
  def int_AMDIL_min     : GCCBuiltin<"__amdil_min">,
          BinaryIntFloat;
  def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
          BinaryIntInt;
  def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
          BinaryIntInt;
  def int_AMDIL_max     : GCCBuiltin<"__amdil_max">,
          BinaryIntFloat;
  def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
          TernaryIntInt;
  def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
          TernaryIntInt;
  def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
          TernaryIntInt;
  def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
          UnaryIntFloat;
  def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
          TernaryIntFloat;
  def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
          UnaryIntFloat;
  def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
          UnaryIntFloat;
  def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
          UnaryIntFloat;
  def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">,
          UnaryIntFloat;
  def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
          UnaryIntFloat;
  def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
          UnaryIntFloat;
  def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
          UnaryIntFloat;
  def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
          UnaryIntFloat;
  def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
          UnaryIntFloat;
  def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
          UnaryIntFloat;
  def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
          UnaryIntFloat;
  def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
          UnaryIntFloat;
  def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
          UnaryIntFloat;
  def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
  def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
  def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
  def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
          UnaryIntFloat;
  def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
          UnaryIntFloat;
  def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
          UnaryIntFloat;
  def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
          UnaryIntFloat;
  def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
          UnaryIntFloat;
  def int_AMDIL_log : GCCBuiltin<"__amdil_log">,
          UnaryIntFloat;
  def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
          UnaryIntFloat;
  def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
          UnaryIntFloat;
  def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
          UnaryIntFloat;
  def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
          TernaryIntFloat;
  def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
          UnaryIntFloat;
  def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
          UnaryIntFloat;
  def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
          UnaryIntFloat;
  def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
          TernaryIntFloat;
  def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
      Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
           llvm_v4i32_ty, llvm_i32_ty], []>;
  def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
        Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
 def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
    Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
  def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
      Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
  def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
      ConvertIntITOF;
  def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
      ConvertIntFTOI;
  def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
      ConvertIntFTOI;
  def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
      ConvertIntFTOI;
  def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
      ConvertIntFTOI;
  def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
      ConvertIntFTOI;
  def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
      ConvertIntFTOI;
 def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
      Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
  def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
      ConvertIntITOF;
  def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
      ConvertIntITOF;
  def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
      ConvertIntITOF;
  def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
      ConvertIntITOF;
  def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
        Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
          llvm_v2f32_ty, llvm_float_ty], []>;
  def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
        Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
          llvm_v2f32_ty], []>;
  def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
        Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
          llvm_v4f32_ty], []>;
  def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
        Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
          llvm_v4f32_ty], []>;
 //===---------------------- Image functions begin ------------------------===//
  def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">,
      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image1d_read_norm  : GCCBuiltin<"__amdil_image1d_read_norm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image1d_read_unnorm  : GCCBuiltin<"__amdil_image1d_read_unnorm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image1d_info0 : GCCBuiltin<"__amdil_image1d_info0">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
  def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
 def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image1d_array_read_norm  : GCCBuiltin<"__amdil_image1d_array_read_norm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image1d_array_read_unnorm  : GCCBuiltin<"__amdil_image1d_array_read_unnorm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image1d_array_info0 : GCCBuiltin<"__amdil_image1d_array_info0">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
  def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
 def int_AMDIL_image2d_write : GCCBuiltin<"__amdil_image2d_write">,
      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image2d_read_norm  : GCCBuiltin<"__amdil_image2d_read_norm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image2d_read_unnorm  : GCCBuiltin<"__amdil_image2d_read_unnorm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image2d_info0 : GCCBuiltin<"__amdil_image2d_info0">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
  def int_AMDIL_image2d_info1 : GCCBuiltin<"__amdil_image2d_info1">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
 def int_AMDIL_image2d_array_write : GCCBuiltin<"__amdil_image2d_array_write">,
      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image2d_array_read_norm  : GCCBuiltin<"__amdil_image2d_array_read_norm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image2d_array_read_unnorm  : GCCBuiltin<"__amdil_image2d_array_read_unnorm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image2d_array_info0 : GCCBuiltin<"__amdil_image2d_array_info0">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
  def int_AMDIL_image2d_array_info1 : GCCBuiltin<"__amdil_image2d_array_info1">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
  def int_AMDIL_image3d_write : GCCBuiltin<"__amdil_image3d_write">,
         Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image3d_read_norm  : GCCBuiltin<"__amdil_image3d_read_norm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image3d_read_unnorm  : GCCBuiltin<"__amdil_image3d_read_unnorm">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_image3d_info0 : GCCBuiltin<"__amdil_image3d_info0">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
  def int_AMDIL_image3d_info1 : GCCBuiltin<"__amdil_image3d_info1">,
      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
 //===---------------------- Image functions end --------------------------===//
  def int_AMDIL_append_alloc_i32 : GCCBuiltin<"__amdil_append_alloc">,
      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_append_consume_i32 : GCCBuiltin<"__amdil_append_consume">,
      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_append_alloc_i32_noret : GCCBuiltin<"__amdil_append_alloc_noret">,
      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_append_consume_i32_noret : GCCBuiltin<"__amdil_append_consume_noret">,
      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
  def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">,
      Intrinsic<[llvm_v4i32_ty], [], []>;
  def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">,
      Intrinsic<[llvm_i32_ty], [], []>;
  def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">,
      Intrinsic<[llvm_i32_ty], []>;
  def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">,
      Intrinsic<[llvm_i32_ty], []>;
 /// Intrinsics for atomic instructions with no return value
 /// Signed 32 bit integer atomics for global address space
 def int_AMDIL_atomic_add_gi32_noret : GCCBuiltin<"__atomic_add_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_sub_gi32_noret : GCCBuiltin<"__atomic_sub_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_rsub_gi32_noret : GCCBuiltin<"__atomic_rsub_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_gi32_noret : GCCBuiltin<"__atomic_xchg_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_inc_gi32_noret : GCCBuiltin<"__atomic_inc_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_dec_gi32_noret : GCCBuiltin<"__atomic_dec_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_cmpxchg_gi32_noret : GCCBuiltin<"__atomic_cmpxchg_gi32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_min_gi32_noret : GCCBuiltin<"__atomic_min_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_max_gi32_noret : GCCBuiltin<"__atomic_max_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_and_gi32_noret : GCCBuiltin<"__atomic_and_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_or_gi32_noret : GCCBuiltin<"__atomic_or_gi32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">,
    BinaryAtomicIntNoRet;
 /// Unsigned 32 bit integer atomics for global address space
 def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_sub_gu32_noret : GCCBuiltin<"__atomic_sub_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_rsub_gu32_noret : GCCBuiltin<"__atomic_rsub_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_gu32_noret : GCCBuiltin<"__atomic_xchg_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_inc_gu32_noret : GCCBuiltin<"__atomic_inc_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_dec_gu32_noret : GCCBuiltin<"__atomic_dec_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_cmpxchg_gu32_noret : GCCBuiltin<"__atomic_cmpxchg_gu32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_min_gu32_noret : GCCBuiltin<"__atomic_min_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_max_gu32_noret : GCCBuiltin<"__atomic_max_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_and_gu32_noret : GCCBuiltin<"__atomic_and_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_or_gu32_noret : GCCBuiltin<"__atomic_or_gu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xor_gu32_noret : GCCBuiltin<"__atomic_xor_gu32_noret">,
    BinaryAtomicIntNoRet;
 /// Intrinsics for atomic instructions with a return value
 /// Signed 32 bit integer atomics for global address space
 def int_AMDIL_atomic_add_gi32 : GCCBuiltin<"__atomic_add_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_sub_gi32 : GCCBuiltin<"__atomic_sub_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_rsub_gi32 : GCCBuiltin<"__atomic_rsub_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_gi32 : GCCBuiltin<"__atomic_xchg_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_inc_gi32 : GCCBuiltin<"__atomic_inc_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_dec_gi32 : GCCBuiltin<"__atomic_dec_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_cmpxchg_gi32 : GCCBuiltin<"__atomic_cmpxchg_gi32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_min_gi32 : GCCBuiltin<"__atomic_min_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_max_gi32 : GCCBuiltin<"__atomic_max_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_and_gi32 : GCCBuiltin<"__atomic_and_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_or_gi32 : GCCBuiltin<"__atomic_or_gi32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xor_gi32 : GCCBuiltin<"__atomic_xor_gi32">,
    BinaryAtomicInt;
 /// 32 bit float atomics required by OpenCL
 def int_AMDIL_atomic_xchg_gf32 : GCCBuiltin<"__atomic_xchg_gf32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_gf32_noret : GCCBuiltin<"__atomic_xchg_gf32_noret">,
    BinaryAtomicIntNoRet;
 /// Unsigned 32 bit integer atomics for global address space
 def int_AMDIL_atomic_add_gu32 : GCCBuiltin<"__atomic_add_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_sub_gu32 : GCCBuiltin<"__atomic_sub_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_rsub_gu32 : GCCBuiltin<"__atomic_rsub_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_gu32 : GCCBuiltin<"__atomic_xchg_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_inc_gu32 : GCCBuiltin<"__atomic_inc_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_dec_gu32 : GCCBuiltin<"__atomic_dec_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_cmpxchg_gu32 : GCCBuiltin<"__atomic_cmpxchg_gu32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_min_gu32 : GCCBuiltin<"__atomic_min_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_max_gu32 : GCCBuiltin<"__atomic_max_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_and_gu32 : GCCBuiltin<"__atomic_and_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_or_gu32 : GCCBuiltin<"__atomic_or_gu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xor_gu32 : GCCBuiltin<"__atomic_xor_gu32">,
    BinaryAtomicInt;
 /// Intrinsics for atomic instructions with no return value
 /// Signed 32 bit integer atomics for local address space
 def int_AMDIL_atomic_add_li32_noret : GCCBuiltin<"__atomic_add_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_sub_li32_noret : GCCBuiltin<"__atomic_sub_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_rsub_li32_noret : GCCBuiltin<"__atomic_rsub_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_li32_noret : GCCBuiltin<"__atomic_xchg_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_inc_li32_noret : GCCBuiltin<"__atomic_inc_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_dec_li32_noret : GCCBuiltin<"__atomic_dec_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_cmpxchg_li32_noret : GCCBuiltin<"__atomic_cmpxchg_li32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_min_li32_noret : GCCBuiltin<"__atomic_min_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_max_li32_noret : GCCBuiltin<"__atomic_max_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_and_li32_noret : GCCBuiltin<"__atomic_and_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_or_li32_noret : GCCBuiltin<"__atomic_or_li32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_mskor_li32_noret : GCCBuiltin<"__atomic_mskor_li32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_xor_li32_noret : GCCBuiltin<"__atomic_xor_li32_noret">,
    BinaryAtomicIntNoRet;
 /// Signed 32 bit integer atomics for region address space
 def int_AMDIL_atomic_add_ri32_noret : GCCBuiltin<"__atomic_add_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_sub_ri32_noret : GCCBuiltin<"__atomic_sub_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_rsub_ri32_noret : GCCBuiltin<"__atomic_rsub_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_ri32_noret : GCCBuiltin<"__atomic_xchg_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_inc_ri32_noret : GCCBuiltin<"__atomic_inc_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_dec_ri32_noret : GCCBuiltin<"__atomic_dec_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_cmpxchg_ri32_noret : GCCBuiltin<"__atomic_cmpxchg_ri32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_min_ri32_noret : GCCBuiltin<"__atomic_min_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_max_ri32_noret : GCCBuiltin<"__atomic_max_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_and_ri32_noret : GCCBuiltin<"__atomic_and_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_or_ri32_noret : GCCBuiltin<"__atomic_or_ri32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_mskor_ri32_noret : GCCBuiltin<"__atomic_mskor_ri32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_xor_ri32_noret : GCCBuiltin<"__atomic_xor_ri32_noret">,
    BinaryAtomicIntNoRet;
 /// Unsigned 32 bit integer atomics for local address space
 def int_AMDIL_atomic_add_lu32_noret : GCCBuiltin<"__atomic_add_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_sub_lu32_noret : GCCBuiltin<"__atomic_sub_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_rsub_lu32_noret : GCCBuiltin<"__atomic_rsub_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_lu32_noret : GCCBuiltin<"__atomic_xchg_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_inc_lu32_noret : GCCBuiltin<"__atomic_inc_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_dec_lu32_noret : GCCBuiltin<"__atomic_dec_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_cmpxchg_lu32_noret : GCCBuiltin<"__atomic_cmpxchg_lu32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_min_lu32_noret : GCCBuiltin<"__atomic_min_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_max_lu32_noret : GCCBuiltin<"__atomic_max_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_and_lu32_noret : GCCBuiltin<"__atomic_and_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_or_lu32_noret : GCCBuiltin<"__atomic_or_lu32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_mskor_lu32_noret : GCCBuiltin<"__atomic_mskor_lu32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_xor_lu32_noret : GCCBuiltin<"__atomic_xor_lu32_noret">,
    BinaryAtomicIntNoRet;
 /// Unsigned 32 bit integer atomics for region address space
 def int_AMDIL_atomic_add_ru32_noret : GCCBuiltin<"__atomic_add_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_sub_ru32_noret : GCCBuiltin<"__atomic_sub_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_rsub_ru32_noret : GCCBuiltin<"__atomic_rsub_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_ru32_noret : GCCBuiltin<"__atomic_xchg_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_inc_ru32_noret : GCCBuiltin<"__atomic_inc_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_dec_ru32_noret : GCCBuiltin<"__atomic_dec_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_cmpxchg_ru32_noret : GCCBuiltin<"__atomic_cmpxchg_ru32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_min_ru32_noret : GCCBuiltin<"__atomic_min_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_max_ru32_noret : GCCBuiltin<"__atomic_max_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_and_ru32_noret : GCCBuiltin<"__atomic_and_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_or_ru32_noret : GCCBuiltin<"__atomic_or_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_mskor_ru32_noret : GCCBuiltin<"__atomic_mskor_ru32_noret">,
    TernaryAtomicIntNoRet;
 def int_AMDIL_atomic_xor_ru32_noret : GCCBuiltin<"__atomic_xor_ru32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">,
    VoidIntLong;
 def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">,
    VoidIntInt;
 def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">,
    VoidIntInt;
 /// Intrinsics for atomic instructions with a return value
 /// Signed 32 bit integer atomics for local address space
 def int_AMDIL_atomic_add_li32 : GCCBuiltin<"__atomic_add_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_sub_li32 : GCCBuiltin<"__atomic_sub_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_rsub_li32 : GCCBuiltin<"__atomic_rsub_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_li32 : GCCBuiltin<"__atomic_xchg_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_inc_li32 : GCCBuiltin<"__atomic_inc_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_dec_li32 : GCCBuiltin<"__atomic_dec_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_cmpxchg_li32 : GCCBuiltin<"__atomic_cmpxchg_li32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_min_li32 : GCCBuiltin<"__atomic_min_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_max_li32 : GCCBuiltin<"__atomic_max_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_and_li32 : GCCBuiltin<"__atomic_and_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_or_li32 : GCCBuiltin<"__atomic_or_li32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_mskor_li32 : GCCBuiltin<"__atomic_mskor_li32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_xor_li32 : GCCBuiltin<"__atomic_xor_li32">,
    BinaryAtomicInt;
 /// Signed 32 bit integer atomics for region address space
 def int_AMDIL_atomic_add_ri32 : GCCBuiltin<"__atomic_add_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_sub_ri32 : GCCBuiltin<"__atomic_sub_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_rsub_ri32 : GCCBuiltin<"__atomic_rsub_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_ri32 : GCCBuiltin<"__atomic_xchg_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_inc_ri32 : GCCBuiltin<"__atomic_inc_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_dec_ri32 : GCCBuiltin<"__atomic_dec_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_cmpxchg_ri32 : GCCBuiltin<"__atomic_cmpxchg_ri32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_min_ri32 : GCCBuiltin<"__atomic_min_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_max_ri32 : GCCBuiltin<"__atomic_max_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_and_ri32 : GCCBuiltin<"__atomic_and_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_or_ri32 : GCCBuiltin<"__atomic_or_ri32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_mskor_ri32 : GCCBuiltin<"__atomic_mskor_ri32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_xor_ri32 : GCCBuiltin<"__atomic_xor_ri32">,
    BinaryAtomicInt;
 /// 32 bit float atomics required by OpenCL
 def int_AMDIL_atomic_xchg_lf32 : GCCBuiltin<"__atomic_xchg_lf32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_lf32_noret : GCCBuiltin<"__atomic_xchg_lf32_noret">,
    BinaryAtomicIntNoRet;
 def int_AMDIL_atomic_xchg_rf32 : GCCBuiltin<"__atomic_xchg_rf32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_rf32_noret : GCCBuiltin<"__atomic_xchg_rf32_noret">,
    BinaryAtomicIntNoRet;
 /// Unsigned 32 bit integer atomics for local address space
 def int_AMDIL_atomic_add_lu32 : GCCBuiltin<"__atomic_add_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_sub_lu32 : GCCBuiltin<"__atomic_sub_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_rsub_lu32 : GCCBuiltin<"__atomic_rsub_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_lu32 : GCCBuiltin<"__atomic_xchg_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_inc_lu32 : GCCBuiltin<"__atomic_inc_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_dec_lu32 : GCCBuiltin<"__atomic_dec_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_cmpxchg_lu32 : GCCBuiltin<"__atomic_cmpxchg_lu32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_min_lu32 : GCCBuiltin<"__atomic_min_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_max_lu32 : GCCBuiltin<"__atomic_max_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_and_lu32 : GCCBuiltin<"__atomic_and_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_or_lu32 : GCCBuiltin<"__atomic_or_lu32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_mskor_lu32 : GCCBuiltin<"__atomic_mskor_lu32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_xor_lu32 : GCCBuiltin<"__atomic_xor_lu32">,
    BinaryAtomicInt;
 /// Unsigned 32 bit integer atomics for region address space
 def int_AMDIL_atomic_add_ru32 : GCCBuiltin<"__atomic_add_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_sub_ru32 : GCCBuiltin<"__atomic_sub_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_rsub_ru32 : GCCBuiltin<"__atomic_rsub_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_xchg_ru32 : GCCBuiltin<"__atomic_xchg_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_inc_ru32 : GCCBuiltin<"__atomic_inc_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_dec_ru32 : GCCBuiltin<"__atomic_dec_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_cmpxchg_ru32 : GCCBuiltin<"__atomic_cmpxchg_ru32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_min_ru32 : GCCBuiltin<"__atomic_min_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_max_ru32 : GCCBuiltin<"__atomic_max_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_and_ru32 : GCCBuiltin<"__atomic_and_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_or_ru32 : GCCBuiltin<"__atomic_or_ru32">,
    BinaryAtomicInt;
 def int_AMDIL_atomic_mskor_ru32 : GCCBuiltin<"__atomic_mskor_ru32">,
    TernaryAtomicInt;
 def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">,
    BinaryAtomicInt;
 /// Semaphore signal/wait/init
 def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">,
    Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
 def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">,
    Intrinsic<[], [llvm_ptr_ty]>;
 def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">,
    Intrinsic<[], [llvm_ptr_ty]>;
 def int_AMDIL_semaphore_size   : GCCBuiltin<"__amdil_max_semaphore_size">,
    Intrinsic<[llvm_i32_ty], []>;
 }
--- a/lib/Target/AMDGPU/AMDILMultiClass.td
+++ b/lib/Target/AMDGPU/AMDILMultiClass.td
@ -0,0 +1,95 @@
 //===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 // Multiclass that handles branch instructions
 multiclass BranchConditional<SDNode Op> {
    def _i32 : ILFormat<IL_OP_IFC, (outs),
  (ins brtarget:$target, GPRI32:$src0),
        "; i32 Pseudo branch instruction",
  [(Op bb:$target, GPRI32:$src0)]>;
    def _f32 : ILFormat<IL_OP_IFC, (outs),
  (ins brtarget:$target, GPRF32:$src0),
        "; f32 Pseudo branch instruction",
  [(Op bb:$target, GPRF32:$src0)]>;
 }
 // Multiclass that handles memory store operations
 multiclass GTRUNCSTORE<string asm> {
  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(global_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(global_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
 }
 // Multiclass that handles memory store operations
 multiclass LTRUNCSTORE<string asm> {
  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(local_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(local_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
 }
 // Multiclass that handles memory store operations
 multiclass PTRUNCSTORE<string asm> {
  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(private_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(private_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
 }
 // Multiclass that handles memory store operations
 multiclass RTRUNCSTORE<string asm> {
  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(region_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(region_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
 }
 // Multiclass that handles memory store operations
 multiclass STORE<string asm, PatFrag OpNode> {
  def _i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(OpNode GPRI32:$val, ADDR:$ptr)]>;
  def _f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEMI32:$ptr),
      !strconcat(asm, " $val $ptr"),
      [(OpNode GPRF32:$val, ADDR:$ptr)]>;
 }
 // Multiclass that handles load operations
 multiclass LOAD<string asm, PatFrag OpNode> {
  def _i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEMI32:$ptr),
      !strconcat(asm, " $dst $ptr"),
      [(set GPRI32:$dst, (OpNode ADDR:$ptr))]>;
  def _f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEMI32:$ptr),
      !strconcat(asm, " $dst $ptr"),
      [(set GPRF32:$dst, (OpNode ADDR:$ptr))]>;
 }
 // Only scalar types should generate flow control
 multiclass BranchInstr<ILOpCode opc> {
  def _i32 : UnaryOpNoRet<opc, (outs), (ins GPRI32:$src),
      !strconcat(opc.Text, " $src"), []>;
  def _f32 : UnaryOpNoRet<opc, (outs), (ins GPRF32:$src),
      !strconcat(opc.Text, " $src"), []>;
 }
 // Only scalar types should generate flow control
 multiclass BranchInstr2<ILOpCode opc> {
  def _i32 : BinaryOpNoRet<opc, (outs), (ins GPRI32:$src0, GPRI32:$src1),
      !strconcat(opc.Text, " $src0, $src1"), []>;
  def _f32 : BinaryOpNoRet<opc, (outs), (ins GPRF32:$src0, GPRF32:$src1),
      !strconcat(opc.Text, " $src0, $src1"), []>;
 }
--- a/lib/Target/AMDGPU/AMDILNIDevice.cpp
+++ b/lib/Target/AMDGPU/AMDILNIDevice.cpp
@ -0,0 +1,71 @@
 //===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #include "AMDILNIDevice.h"
 #include "AMDILEvergreenDevice.h"
 #include "AMDILSubtarget.h"
 using namespace llvm;
 AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST)
  : AMDILEvergreenDevice(ST)
 {
  std::string name = ST->getDeviceName();
  if (name == "caicos") {
    mDeviceFlag = OCL_DEVICE_CAICOS;
  } else if (name == "turks") {
    mDeviceFlag = OCL_DEVICE_TURKS;
  } else if (name == "cayman") {
    mDeviceFlag = OCL_DEVICE_CAYMAN;
  } else {
    mDeviceFlag = OCL_DEVICE_BARTS;
  }
 }
 AMDILNIDevice::~AMDILNIDevice()
 {
 }
 size_t
 AMDILNIDevice::getMaxLDSSize() const
 {
  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
    return MAX_LDS_SIZE_900;
  } else {
    return 0;
  }
 }
 uint32_t
 AMDILNIDevice::getGeneration() const
 {
  return AMDILDeviceInfo::HD6XXX;
 }
 AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST)
  : AMDILNIDevice(ST)
 {
  setCaps();
 }
 AMDILCaymanDevice::~AMDILCaymanDevice()
 {
 }
 void
 AMDILCaymanDevice::setCaps()
 {
  if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
    mHWBits.set(AMDILDeviceInfo::DoubleOps);
    mHWBits.set(AMDILDeviceInfo::FMA);
  }
  mHWBits.set(AMDILDeviceInfo::Signed24BitOps);
  mSWBits.reset(AMDILDeviceInfo::Signed24BitOps);
  mSWBits.set(AMDILDeviceInfo::ArenaSegment);
 }
--- a/lib/Target/AMDGPU/AMDILNIDevice.h
+++ b/lib/Target/AMDGPU/AMDILNIDevice.h
@ -0,0 +1,59 @@
 //===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Interface for the subtarget data classes.
 //
 //===---------------------------------------------------------------------===//
 // This file will define the interface that each generation needs to
 // implement in order to correctly answer queries on the capabilities of the
 // specific hardware.
 //===---------------------------------------------------------------------===//
 #ifndef _AMDILNIDEVICE_H_
 #define _AMDILNIDEVICE_H_
 #include "AMDILEvergreenDevice.h"
 #include "AMDILSubtarget.h"
 namespace llvm {
  class AMDILSubtarget;
 //===---------------------------------------------------------------------===//
 // NI generation of devices and their respective sub classes
 //===---------------------------------------------------------------------===//
 // The AMDILNIDevice is the base class for all Northern Island series of
 // cards. It is very similiar to the AMDILEvergreenDevice, with the major
 // exception being differences in wavefront size and hardware capabilities.  The
 // NI devices are all 64 wide wavefronts and also add support for signed 24 bit
 // integer operations
  class AMDILNIDevice : public AMDILEvergreenDevice {
    public:
      AMDILNIDevice(AMDILSubtarget*);
      virtual ~AMDILNIDevice();
      virtual size_t getMaxLDSSize() const;
      virtual uint32_t getGeneration() const;
    protected:
  }; // AMDILNIDevice
 // Just as the AMDILCypressDevice is the double capable version of the
 // AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of
 // the AMDILNIDevice.  The other major difference that is not as useful from
 // standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
 // NI family is a 5 wide.
  class AMDILCaymanDevice: public AMDILNIDevice {
    public:
      AMDILCaymanDevice(AMDILSubtarget*);
      virtual ~AMDILCaymanDevice();
    private:
      virtual void setCaps();
  }; // AMDILCaymanDevice
  static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800;
 } // namespace llvm
 #endif // _AMDILNIDEVICE_H_
--- a/lib/Target/AMDGPU/AMDILNodes.td
+++ b/lib/Target/AMDGPU/AMDILNodes.td
@ -0,0 +1,47 @@
 //===- AMDILNodes.td - AMD IL nodes ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // Flow Control DAG Nodes
 //===----------------------------------------------------------------------===//
 def IL_brcond      : SDNode<"AMDILISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
 //===----------------------------------------------------------------------===//
 // Comparison DAG Nodes
 //===----------------------------------------------------------------------===//
 def IL_cmp       : SDNode<"AMDILISD::CMP", SDTIL_Cmp>;
 //===----------------------------------------------------------------------===//
 // Call/Return DAG Nodes
 //===----------------------------------------------------------------------===//
 def IL_call      : SDNode<"AMDILISD::CALL", SDTIL_Call,
    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def IL_retflag       : SDNode<"AMDILISD::RET_FLAG", SDTNone,
    [SDNPHasChain, SDNPOptInGlue]>;
 //===--------------------------------------------------------------------===//
 // Instructions
 //===--------------------------------------------------------------------===//
 // Floating point math functions
 def IL_cmov_logical : SDNode<"AMDILISD::CMOVLOG", SDTIL_GenTernaryOp>;
 def IL_div_inf      : SDNode<"AMDILISD::DIV_INF", SDTIL_GenBinaryOp>;
 def IL_mad          : SDNode<"AMDILISD::MAD", SDTIL_GenTernaryOp>;
 //===----------------------------------------------------------------------===//
 // Integer functions
 //===----------------------------------------------------------------------===//
 def IL_umul        : SDNode<"AMDILISD::UMUL"    , SDTIntBinOp,
    [SDNPCommutative, SDNPAssociative]>;
 //===----------------------------------------------------------------------===//
 // Vector functions
 //===----------------------------------------------------------------------===//
 def IL_vbuild     : SDNode<"AMDILISD::VBUILD", SDTIL_GenVecBuild,
    []>;
--- a/lib/Target/AMDGPU/AMDILOperands.td
+++ b/lib/Target/AMDGPU/AMDILOperands.td
@ -0,0 +1,32 @@
 //===- AMDILOperands.td - AMD IL Operands ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // Custom memory operand
 //===----------------------------------------------------------------------===//
 def MEMI32  : Operand<i32> {
    let PrintMethod = "printMemOperand";
    let MIOperandInfo = (ops GPRI32, GPRI32);
 }
 // Call target types
 def calltarget   : Operand<i32>;
 def brtarget   : Operand<OtherVT>;
 // def v2i8imm : Operand<v2i8>;
 // def v4i8imm : Operand<v4i8>;
 // def v2i16imm : Operand<v2i16>;
 // def v4i16imm : Operand<v4i16>;
 // def v2i32imm : Operand<v2i32>;
 // def v4i32imm : Operand<v4i32>;
 // def v2i64imm : Operand<v2i64>;
 // def v2f32imm : Operand<v2f32>;
 // def v4f32imm : Operand<v4f32>;
 // def v2f64imm : Operand<v2f64>;
--- a/lib/Target/AMDGPU/AMDILPatterns.td
+++ b/lib/Target/AMDGPU/AMDILPatterns.td
@ -0,0 +1,504 @@
 //===- AMDILPatterns.td - AMDIL Target Patterns------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // Store pattern fragments
 //===----------------------------------------------------------------------===//
 def truncstorei64 : PatFrag<(ops node:$val, node:$ptr),
                           (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 def truncstorev2i8 : PatFrag<(ops node:$val, node:$ptr),
                           (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i8;
 }]>;
 def truncstorev2i16 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16;
 }]>;
 def truncstorev2i32 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i32;
 }]>;
 def truncstorev2i64 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i64;
 }]>;
 def truncstorev2f32 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f32;
 }]>;
 def truncstorev2f64 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f64;
 }]>;
 def truncstorev4i8 : PatFrag<(ops node:$val, node:$ptr),
                           (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8;
 }]>;
 def truncstorev4i16 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i16;
 }]>;
 def truncstorev4i32 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i32;
 }]>;
 def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
 }]>;
 def global_store : PatFrag<(ops node:$val, node:$ptr),
    (store node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_store : PatFrag<(ops node:$val, node:$ptr),
    (store node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_store : PatFrag<(ops node:$val, node:$ptr),
    (store node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_store : PatFrag<(ops node:$val, node:$ptr),
    (store node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei8 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei16 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei32 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei64 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref32 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref64 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i8 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i16 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i32 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i64 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f32 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f64 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i8 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i16 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i32 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def global_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4f32 node:$val, node:$ptr), [{
        return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstore node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei8 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei16 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei32 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei64 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref32 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref64 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i8 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i16 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i32 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i64 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f32 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f64 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i8 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i16 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i32 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def private_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4f32 node:$val, node:$ptr), [{
        return isPrivateStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstore node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei8 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei16 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei32 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei64 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref32 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref64 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i8 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i16 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i32 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i64 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f32 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f64 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i8 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i16 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i32 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def local_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4f32 node:$val, node:$ptr), [{
        return isLocalStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstore node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei8 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei16 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei32 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorei64 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref32 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstoref64 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i8 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i16 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i32 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2i64 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f32 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev2f64 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i8 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i16 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4i32 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 def region_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
    (truncstorev4f32 node:$val, node:$ptr), [{
        return isRegionStore(dyn_cast<StoreSDNode>(N));
 }]>;
 //===----------------------------------------------------------------------===//
 // Load pattern fragments
 //===----------------------------------------------------------------------===//
 // Global address space loads
 def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def global_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def global_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def global_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 // Private address space loads
 def private_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def private_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def private_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def private_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 // Local address space loads
 def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    return isLocalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def local_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
    return isLocalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def local_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
    return isLocalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def local_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
    return isLocalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 // Region address space loads
 def region_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    return isRegionLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def region_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
    return isRegionLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def region_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
    return isRegionLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def region_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
    return isRegionLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 // Constant address space loads
 def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
 def constant_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
 def constant_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
 def constant_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
 // Constant pool loads
 def cp_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  return isCPLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def cp_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
  return isCPLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def cp_zext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
  return isCPLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 def cp_aext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
  return isCPLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 //===----------------------------------------------------------------------===//
 // Complex addressing mode patterns
 //===----------------------------------------------------------------------===//
 def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
 def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
 def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
 def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
 //===----------------------------------------------------------------------===//
 // Conditional Instruction Pattern Leafs
 //===----------------------------------------------------------------------===//
 class IL_CC_Op<int N> : PatLeaf<(i32 N)>;
 def IL_CC_D_EQ  : IL_CC_Op<0>;
 def IL_CC_D_GE  : IL_CC_Op<1>;
 def IL_CC_D_LT  : IL_CC_Op<2>;
 def IL_CC_D_NE  : IL_CC_Op<3>;
 def IL_CC_F_EQ  : IL_CC_Op<4>;
 def IL_CC_F_GE  : IL_CC_Op<5>;
 def IL_CC_F_LT  : IL_CC_Op<6>;
 def IL_CC_F_NE  : IL_CC_Op<7>;
 def IL_CC_I_EQ  : IL_CC_Op<8>;
 def IL_CC_I_GE  : IL_CC_Op<9>;
 def IL_CC_I_LT  : IL_CC_Op<10>;
 def IL_CC_I_NE  : IL_CC_Op<11>;
 def IL_CC_U_GE  : IL_CC_Op<12>;
 def IL_CC_U_LT  : IL_CC_Op<13>;
 // Pseudo IL comparison instructions that aren't natively supported
 def IL_CC_F_GT  : IL_CC_Op<14>;
 def IL_CC_U_GT  : IL_CC_Op<15>;
 def IL_CC_I_GT  : IL_CC_Op<16>;
 def IL_CC_D_GT  : IL_CC_Op<17>;
 def IL_CC_F_LE  : IL_CC_Op<18>;
 def IL_CC_U_LE  : IL_CC_Op<19>;
 def IL_CC_I_LE  : IL_CC_Op<20>;
 def IL_CC_D_LE  : IL_CC_Op<21>;
 def IL_CC_F_UNE : IL_CC_Op<22>;
 def IL_CC_F_UEQ : IL_CC_Op<23>;
 def IL_CC_F_ULT : IL_CC_Op<24>;
 def IL_CC_F_UGT : IL_CC_Op<25>;
 def IL_CC_F_ULE : IL_CC_Op<26>;
 def IL_CC_F_UGE : IL_CC_Op<27>;
 def IL_CC_F_ONE : IL_CC_Op<28>;
 def IL_CC_F_OEQ : IL_CC_Op<29>;
 def IL_CC_F_OLT : IL_CC_Op<30>;
 def IL_CC_F_OGT : IL_CC_Op<31>;
 def IL_CC_F_OLE : IL_CC_Op<32>;
 def IL_CC_F_OGE : IL_CC_Op<33>;
 def IL_CC_D_UNE : IL_CC_Op<34>;
 def IL_CC_D_UEQ : IL_CC_Op<35>;
 def IL_CC_D_ULT : IL_CC_Op<36>;
 def IL_CC_D_UGT : IL_CC_Op<37>;
 def IL_CC_D_ULE : IL_CC_Op<38>;
 def IL_CC_D_UGE : IL_CC_Op<39>;
 def IL_CC_D_ONE : IL_CC_Op<30>;
 def IL_CC_D_OEQ : IL_CC_Op<41>;
 def IL_CC_D_OLT : IL_CC_Op<42>;
 def IL_CC_D_OGT : IL_CC_Op<43>;
 def IL_CC_D_OLE : IL_CC_Op<44>;
 def IL_CC_D_OGE : IL_CC_Op<45>;
 def IL_CC_U_EQ  : IL_CC_Op<46>;
 def IL_CC_U_NE  : IL_CC_Op<47>;
 def IL_CC_F_O   : IL_CC_Op<48>;
 def IL_CC_D_O   : IL_CC_Op<49>;
 def IL_CC_F_UO  : IL_CC_Op<50>;
 def IL_CC_D_UO  : IL_CC_Op<51>;
 def IL_CC_L_LE  : IL_CC_Op<52>;
 def IL_CC_L_GE  : IL_CC_Op<53>;
 def IL_CC_L_EQ  : IL_CC_Op<54>;
 def IL_CC_L_NE  : IL_CC_Op<55>;
 def IL_CC_L_LT  : IL_CC_Op<56>;
 def IL_CC_L_GT  : IL_CC_Op<57>;
 def IL_CC_UL_LE  : IL_CC_Op<58>;
 def IL_CC_UL_GE  : IL_CC_Op<59>;
 def IL_CC_UL_EQ  : IL_CC_Op<60>;
 def IL_CC_UL_NE  : IL_CC_Op<61>;
 def IL_CC_UL_LT  : IL_CC_Op<62>;
 def IL_CC_UL_GT  : IL_CC_Op<63>;
--- a/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
+++ b/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
--- a/lib/Target/AMDGPU/AMDILProfiles.td
+++ b/lib/Target/AMDGPU/AMDILProfiles.td
@ -0,0 +1,174 @@
 //===- AMDILProfiles.td - AMD IL Profiles ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 // These are used for custom selection dag type profiles
 //===----------------------------------------------------------------------===//
 // Custom Selection DAG Type Profiles
 //===----------------------------------------------------------------------===//
 // SDTCisDP - The specified operand has double type
 // Tablegen needs to be hacked to get this constraint to work
 //class SDTCisDP<int OpNum> : SDTypeConstraint<OpNum>;
 //===----------------------------------------------------------------------===//
 // Generic Profile Types
 //===----------------------------------------------------------------------===//
 def SDTIL_GenUnaryOp : SDTypeProfile<1, 1, [
    SDTCisSameAs<0, 1>
    ]>;
 def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
    SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
    ]>;
 def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
    SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
    ]>;
 def SDTIL_GenCMovLog : SDTypeProfile<1, 3, [
    SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1>
    ]>;
 def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
    SDTCisEltOfVec<1, 0>
    ]>;
 def SDTIL_GenVecExtract : SDTypeProfile<1, 2, [
    SDTCisEltOfVec<0, 1>, SDTCisVT<2, i32>
    ]>;
 def SDTIL_GenVecInsert : SDTypeProfile<1, 4, [
    SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>,
    SDTCisVT<3, i32>, SDTCisVT<4, i32>
    ]>;
 def SDTIL_GenVecShuffle : SDTypeProfile <1, 2, [
    SDTCisSameAs<0, 1>, SDTCisVT<2, i32>
    ]>;
 def SDTIL_GenVecConcat : SDTypeProfile <1, 2, [
    SDTCisSameAs<1, 2>
    ]>;
 //===----------------------------------------------------------------------===//
 // Conversion Profile Types
 //===----------------------------------------------------------------------===//
 def SDTIL_DPToFPOp : SDTypeProfile<1, 1, [
    SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
    ]>; // d2f
 def SDTIL_AnyToInt : SDTypeProfile<1, 1, [
    SDTCisInt<0>
    ]>;
 def SDTIL_IntToAny : SDTypeProfile<1, 1, [
    SDTCisInt<1>
    ]>;
 def SDTIL_GenBitConv : SDTypeProfile<1, 1, []>;
 //===----------------------------------------------------------------------===//
 // Scalar Profile Types
 //===----------------------------------------------------------------------===//
 // Add instruction pattern to handle offsets of memory operationns
 def SDTIL_AddAddrri: SDTypeProfile<1, 2, [
    SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisSameAs<0, 2>
    ]>;
 def SDTIL_AddAddrir : SDTypeProfile<1, 2, [
    SDTCisInt<0>, SDTCisPtrTy<2>, SDTCisSameAs<0, 1>
    ]>;
 def SDTIL_LCreate : SDTypeProfile<1, 2, [
    SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
    ]>;
 def SDTIL_LCreate2 : SDTypeProfile<1, 2, [
    SDTCisVT<0, v2i64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
    ]>;
 def SDTIL_LComp : SDTypeProfile<1, 1, [
    SDTCisVT<0, i32>, SDTCisVT<1, i64>
    ]>;
 def SDTIL_LComp2 : SDTypeProfile<1, 1, [
    SDTCisVT<0, v2i32>, SDTCisVT<1, v2i64>
    ]>;
 def SDTIL_DCreate : SDTypeProfile<1, 2, [
    SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
    ]>;
 def SDTIL_DComp : SDTypeProfile<1, 1, [
    SDTCisVT<0, i32>, SDTCisVT<1, f64>
    ]>;
 def SDTIL_DCreate2 : SDTypeProfile<1, 2, [
    SDTCisVT<0, v2f64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
    ]>;
 def SDTIL_DComp2 : SDTypeProfile<1, 1, [
    SDTCisVT<0, v2i32>, SDTCisVT<1, v2f64>
    ]>;
 //===----------------------------------------------------------------------===//
 // Flow Control Profile Types
 //===----------------------------------------------------------------------===//
 // Profile for Normal Call
 def SDTIL_Call : SDTypeProfile<0, 1, [
    SDTCisVT<0, i32>
    ]>;
 // Branch instruction where second and third are basic blocks
 def SDTIL_BRCond : SDTypeProfile<0, 2, [
    SDTCisVT<0, OtherVT>
    ]>;
 // Comparison instruction
 def SDTIL_Cmp  : SDTypeProfile<1, 3, [
    SDTCisSameAs<0, 2>, SDTCisSameAs<2,3>, SDTCisVT<1, i32>
    ]>;
 //===----------------------------------------------------------------------===//
 // Call Sequence Profiles
 //===----------------------------------------------------------------------===//
 def SDTIL_CallSeqStart  : SDCallSeqStart< [
    SDTCisVT<0, i32>
    ]>;
 def SDTIL_CallSeqEnd    : SDCallSeqEnd< [
    SDTCisVT<0, i32>, SDTCisVT<1, i32>
    ]>;
 //===----------------------------------------------------------------------===//
 // Image Operation Profiles
 //===----------------------------------------------------------------------===//
 def SDTIL_ImageRead  : SDTypeProfile<1, 3, 
    [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, v4f32>]>;
 def SDTIL_ImageWrite : SDTypeProfile<0, 3,
    [SDTCisPtrTy<0>, SDTCisVT<1, v2i32>, SDTCisVT<2, v4i32>]>;
 def SDTIL_ImageWrite3D : SDTypeProfile<0, 3,
    [SDTCisPtrTy<0>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>]>;
 def SDTIL_ImageInfo  : SDTypeProfile<1, 1,
    [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>]>;
 //===----------------------------------------------------------------------===//
 // Atomic Operation Profiles
 //===----------------------------------------------------------------------===//
 def SDTIL_UniAtomNoRet : SDTypeProfile<0, 2, [
    SDTCisPtrTy<0>, SDTCisVT<1, i32>
    ]>;
 def SDTIL_BinAtomNoRet : SDTypeProfile<0, 3, [
    SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
    ]>;
 def SDTIL_TriAtomNoRet : SDTypeProfile<0, 4, [
    SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
    ]>;
 def SDTIL_UniAtom : SDTypeProfile<1, 2, [
    SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
    ]>;
 def SDTIL_BinAtom : SDTypeProfile<1, 3, [
    SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
    ]>;
 def SDTIL_TriAtom : SDTypeProfile<1, 4, [
    SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>,
    SDTCisVT<3, i32>, SDTCisVT<4, i32>
    ]>;
 def SDTIL_BinAtomFloat : SDTypeProfile<1, 3, [
    SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, f32>, SDTCisVT<3, f32>
    ]>;
 def SDTIL_BinAtomNoRetFloat : SDTypeProfile<0, 3, [
    SDTCisPtrTy<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>
    ]>;
 def SDTIL_Append : SDTypeProfile<1, 1, [
    SDTCisVT<0, i32>, SDTCisPtrTy<1>
    ]>;
--- a/lib/Target/AMDGPU/AMDILRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDILRegisterInfo.cpp
@ -0,0 +1,162 @@
 //===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the AMDIL implementation of the TargetRegisterInfo class.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDILRegisterInfo.h"
 #include "AMDIL.h"
 #include "AMDILInstrInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 AMDILRegisterInfo::AMDILRegisterInfo(TargetMachine &tm,
    const TargetInstrInfo &tii)
 : AMDILGenRegisterInfo(0), // RA???
  TM(tm), TII(tii)
 {
  baseOffset = 0;
  nextFuncOffset = 0;
 }
 const uint16_t*
 AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
 {
  static const uint16_t CalleeSavedRegs[] = { 0 };
  // TODO: Does IL need to actually have any callee saved regs?
  // I don't think we do since we can just use sequential registers
  // Maybe this would be easier if every function call was inlined first
  // and then there would be no callee issues to deal with
  //TODO(getCalleeSavedRegs);
  return CalleeSavedRegs;
 }
 BitVector
 AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const
 {
  BitVector Reserved(getNumRegs());
  // We reserve the first getNumRegs() registers as they are the ones passed
  // in live-in/live-out
  // and therefor cannot be killed by the scheduler. This works around a bug
  // discovered
  // that was causing the linearscan register allocator to kill registers
  // inside of the
  // function that were also passed as LiveIn registers.
  for (unsigned int x = 0, y = 256; x < y; ++x) {
    Reserved.set(x);
  }
  return Reserved;
 }
 BitVector
 AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF,
    const TargetRegisterClass *RC = NULL) const
 {
  BitVector Allocatable(getNumRegs());
  Allocatable.clear();
  return Allocatable;
 }
 const TargetRegisterClass* const*
 AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
 {
  static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
  // TODO: Keep in sync with getCalleeSavedRegs
  //TODO(getCalleeSavedRegClasses);
  return CalleeSavedRegClasses;
 }
 void
 AMDILRegisterInfo::eliminateCallFramePseudoInstr(
    MachineFunction &MF,
    MachineBasicBlock &MBB,
    MachineBasicBlock::iterator I) const
 {
  MBB.erase(I);
 }
 // For each frame index we find, we store the offset in the stack which is
 // being pushed back into the global buffer. The offset into the stack where
 // the value is stored is copied into a new register and the frame index is
 // then replaced with that register.
 void 
 AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
    int SPAdj,
    RegScavenger *RS) const
 {
  assert(!"Implement");
 }
 void
 AMDILRegisterInfo::processFunctionBeforeFrameFinalized(
    MachineFunction &MF) const
 {
  //TODO(processFunctionBeforeFrameFinalized);
  // Here we keep track of the amount of stack that the current function
  // uses so
  // that we can set the offset to the end of the stack and any other
  // function call
  // will not overwrite any stack variables.
  // baseOffset = nextFuncOffset;
  MachineFrameInfo *MFI = MF.getFrameInfo();
  for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) {
    int64_t size = MFI->getObjectSize(x);
    if (!(size % 4) && size > 1) {
      nextFuncOffset += size;
    } else {
      nextFuncOffset += 16;
    }
  }
 }
 unsigned int
 AMDILRegisterInfo::getRARegister() const
 {
  return AMDGPU::RA;
 }
 unsigned int
 AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const
 {
  return AMDGPU::FP;
 }
 unsigned int
 AMDILRegisterInfo::getEHExceptionRegister() const
 {
  assert(0 && "What is the exception register");
  return 0;
 }
 unsigned int
 AMDILRegisterInfo::getEHHandlerRegister() const
 {
  assert(0 && "What is the exception handler register");
  return 0;
 }
 int64_t
 AMDILRegisterInfo::getStackSize() const
 {
  return nextFuncOffset - baseOffset;
 }
 #define GET_REGINFO_TARGET_DESC
 #include "AMDGPUGenRegisterInfo.inc"
--- a/lib/Target/AMDGPU/AMDILRegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDILRegisterInfo.h
@ -0,0 +1,95 @@
 //===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file contains the AMDIL implementation of the TargetRegisterInfo class.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDILREGISTERINFO_H_
 #define AMDILREGISTERINFO_H_
 #include "llvm/Target/TargetRegisterInfo.h"
 #define GET_REGINFO_HEADER
 #include "AMDGPUGenRegisterInfo.inc"
 // See header file for explanation
 namespace llvm
 {
  class TargetInstrInfo;
  class Type;
  /// DWARFFlavour - Flavour of dwarf regnumbers
  ///
  namespace DWARFFlavour {
    enum {
      AMDIL_Generic = 0
    };
  }
  struct AMDILRegisterInfo : public AMDILGenRegisterInfo
  {
    TargetMachine &TM;
    const TargetInstrInfo &TII;
    AMDILRegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
    /// Code Generation virtual methods...
    const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const;
    const TargetRegisterClass* const*
      getCalleeSavedRegClasses(
          const MachineFunction *MF = 0) const;
    BitVector
      getReservedRegs(const MachineFunction &MF) const;
    BitVector
      getAllocatableSet(const MachineFunction &MF,
          const TargetRegisterClass *RC) const;
    void
      eliminateCallFramePseudoInstr(
          MachineFunction &MF,
          MachineBasicBlock &MBB,
          MachineBasicBlock::iterator I) const;
    void
      eliminateFrameIndex(MachineBasicBlock::iterator II,
          int SPAdj, RegScavenger *RS = NULL) const;
    void
      processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
    // Debug information queries.
    unsigned int
      getRARegister() const;
    unsigned int
      getFrameRegister(const MachineFunction &MF) const;
    // Exception handling queries.
    unsigned int
      getEHExceptionRegister() const;
    unsigned int
      getEHHandlerRegister() const;
    int64_t
      getStackSize() const;
    virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT)
                                                                      const {
      return &AMDGPU::GPRI32RegClass;
    }
    private:
    mutable int64_t baseOffset;
    mutable int64_t nextFuncOffset;
  };
 } // end namespace llvm
 #endif // AMDILREGISTERINFO_H_
--- a/lib/Target/AMDGPU/AMDILRegisterInfo.td
+++ b/lib/Target/AMDGPU/AMDILRegisterInfo.td
@ -0,0 +1,110 @@
 //===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 //  Declarations that describe the AMDIL register file
 //
 //===----------------------------------------------------------------------===//
 class AMDILReg<bits<16> num, string n> : Register<n> {
  field bits<16> Value;
  let Value = num;
  let Namespace = "AMDGPU";
 }
 // We will start with 8 registers for each class before expanding to more
 // Since the swizzle is added based on the register class, we can leave it
 // off here and just specify different registers for different register classes
 def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
 def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
 def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
 def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
 def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
 def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
 def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
 def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
 def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
 def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
 def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
 def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
 def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
 def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
 def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
 def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
 def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
 def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
 def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
 def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
 // All registers between 1000 and 1024 are reserved and cannot be used
 // unless commented in this section
 // r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
 // r1020 is used to hold the frame index for local arrays
 // r1019 is used to hold the dynamic stack allocation pointer
 // r1018 is used as a temporary register for handwritten code
 // r1017 is used as a temporary register for handwritten code
 // r1016 is used as a temporary register for load/store code
 // r1015 is used as a temporary register for data segment offset
 // r1014 is used as a temporary register for store code
 // r1013 is used as the section data pointer register
 // r1012-r1010 and r1001-r1008 are used for temporary I/O registers
 // r1009 is used as the frame pointer register
 // r999 is used as the mem register.
 // r998 is used as the return address register.
 //def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
 //def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
 //def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
 //def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
 //def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
 //def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
 def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
 def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
 def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
 def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
 def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
 def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
 def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
 def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
 def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
 def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
 def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
 def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
 def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
 def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
 def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
 def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
 def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
 def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
 def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
 def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
 def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
 def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
 def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
  (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
 {
        let AltOrders = [(add (sequence "R%u", 1, 20))];
        let AltOrderSelect = [{
          return 1;
        }];
    }
 def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
  (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
 {
        let AltOrders = [(add (sequence "R%u", 1, 20))];
        let AltOrderSelect = [{
          return 1;
        }];
    }
 def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
  (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
 {
        let AltOrders = [(add (sequence "R%u", 1, 20))];
        let AltOrderSelect = [{
          return 1;
        }];
    }
--- a/lib/Target/AMDGPU/AMDILSIDevice.cpp
+++ b/lib/Target/AMDGPU/AMDILSIDevice.cpp
@ -0,0 +1,49 @@
 //===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 #include "AMDILSIDevice.h"
 #include "AMDILEvergreenDevice.h"
 #include "AMDILNIDevice.h"
 #include "AMDILSubtarget.h"
 using namespace llvm;
 AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
  : AMDILEvergreenDevice(ST)
 {
 }
 AMDILSIDevice::~AMDILSIDevice()
 {
 }
 size_t
 AMDILSIDevice::getMaxLDSSize() const
 {
  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
    return MAX_LDS_SIZE_900;
  } else {
    return 0;
  }
 }
 uint32_t
 AMDILSIDevice::getGeneration() const
 {
  return AMDILDeviceInfo::HD7XXX;
 }
 std::string
 AMDILSIDevice::getDataLayout() const
 {
    return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
      "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
      "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
      "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
      "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
      "-n8:16:32:64");
 }
--- a/lib/Target/AMDGPU/AMDILSIDevice.h
+++ b/lib/Target/AMDGPU/AMDILSIDevice.h
@ -0,0 +1,45 @@
 //===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // Interface for the subtarget data classes.
 //
 //===---------------------------------------------------------------------===//
 // This file will define the interface that each generation needs to
 // implement in order to correctly answer queries on the capabilities of the
 // specific hardware.
 //===---------------------------------------------------------------------===//
 #ifndef _AMDILSIDEVICE_H_
 #define _AMDILSIDEVICE_H_
 #include "AMDILEvergreenDevice.h"
 #include "AMDILSubtarget.h"
 namespace llvm {
  class AMDILSubtarget;
 //===---------------------------------------------------------------------===//
 // SI generation of devices and their respective sub classes
 //===---------------------------------------------------------------------===//
 // The AMDILSIDevice is the base class for all Northern Island series of
 // cards. It is very similiar to the AMDILEvergreenDevice, with the major
 // exception being differences in wavefront size and hardware capabilities.  The
 // SI devices are all 64 wide wavefronts and also add support for signed 24 bit
 // integer operations
  class AMDILSIDevice : public AMDILEvergreenDevice {
    public:
      AMDILSIDevice(AMDILSubtarget*);
      virtual ~AMDILSIDevice();
      virtual size_t getMaxLDSSize() const;
      virtual uint32_t getGeneration() const;
      virtual std::string getDataLayout() const;
    protected:
  }; // AMDILSIDevice
 } // namespace llvm
 #endif // _AMDILSIDEVICE_H_
--- a/lib/Target/AMDGPU/AMDILSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDILSubtarget.cpp
@ -0,0 +1,178 @@
 //===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file implements the AMD IL specific subclass of TargetSubtarget.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDILSubtarget.h"
 #include "AMDIL.h"
 #include "AMDILDevices.h"
 #include "AMDILUtilityFunctions.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SubtargetFeature.h"
 using namespace llvm;
 #define GET_SUBTARGETINFO_ENUM
 #define GET_SUBTARGETINFO_CTOR
 #define GET_SUBTARGETINFO_TARGET_DESC
 #include "AMDGPUGenSubtargetInfo.inc"
 AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ),
  mDumpCode(false)
 {
  memset(CapsOverride, 0, sizeof(*CapsOverride)
      * AMDILDeviceInfo::MaxNumberCapabilities);
  // Default card
  std::string GPU = "rv770";
  GPU = CPU;
  mIs64bit = false;
  mVersion = 0;
  SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features;
  SplitString(FS, Features, ",");
  mDefaultSize[0] = 64;
  mDefaultSize[1] = 1;
  mDefaultSize[2] = 1;
  std::string newFeatures = "";
 #if defined(_DEBUG) || defined(DEBUG)
  bool useTest = false;
 #endif
  for (size_t x = 0; x < Features.size(); ++x) {
    if (Features[x].startswith("+mwgs")) {
      SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes;
      SplitString(Features[x], sizes, "-");
      size_t mDim = ::atoi(sizes[1].data());
      if (mDim > 3) {
        mDim = 3;
      }
      for (size_t y = 0; y < mDim; ++y) {
        mDefaultSize[y] = ::atoi(sizes[y+2].data());
      }
 #if defined(_DEBUG) || defined(DEBUG)
    } else if (!Features[x].compare("test")) {
      useTest = true;
 #endif
    } else if (Features[x].startswith("+cal")) {
      SmallVector<StringRef, DEFAULT_VEC_SLOTS> version;
      SplitString(Features[x], version, "=");
      mVersion = ::atoi(version[1].data());
    } else {
      GPU = CPU;
      if (x > 0) newFeatures += ',';
      newFeatures += Features[x];
    }
  }
  // If we don't have a version then set it to
  // -1 which enables everything. This is for
  // offline devices.
  if (!mVersion) {
    mVersion = (uint32_t)-1;
  }
  for (int x = 0; x < 3; ++x) {
    if (!mDefaultSize[x]) {
      mDefaultSize[x] = 1;
    }
  }
 #if defined(_DEBUG) || defined(DEBUG)
  if (useTest) {
    GPU = "kauai";
  }
 #endif
  ParseSubtargetFeatures(GPU, newFeatures);
 #if defined(_DEBUG) || defined(DEBUG)
  if (useTest) {
    GPU = "test";
  }
 #endif
  mDevName = GPU;
  mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
 }
 AMDILSubtarget::~AMDILSubtarget()
 {
  delete mDevice;
 }
 bool
 AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const
 {
  assert(caps < AMDILDeviceInfo::MaxNumberCapabilities &&
      "Caps index is out of bounds!");
  return CapsOverride[caps];
 }
 bool
 AMDILSubtarget::is64bit() const 
 {
  return mIs64bit;
 }
 bool
 AMDILSubtarget::isTargetELF() const
 {
  return false;
 }
 size_t
 AMDILSubtarget::getDefaultSize(uint32_t dim) const
 {
  if (dim > 3) {
    return 1;
  } else {
    return mDefaultSize[dim];
  }
 }
 uint32_t
 AMDILSubtarget::calVersion() const
 {
  return mVersion;
 }
 AMDILGlobalManager*
 AMDILSubtarget::getGlobalManager() const
 {
  return mGM;
 }
 void
 AMDILSubtarget::setGlobalManager(AMDILGlobalManager *gm) const
 {
  mGM = gm;
 }
 AMDILKernelManager*
 AMDILSubtarget::getKernelManager() const
 {
  return mKM;
 }
 void
 AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const
 {
  mKM = km;
 }
 std::string
 AMDILSubtarget::getDataLayout() const
 {
    if (!mDevice) {
        return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
                "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
                "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
                "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
                "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
    }
    return mDevice->getDataLayout();
 }
 std::string
 AMDILSubtarget::getDeviceName() const
 {
  return mDevName;
 }
 const AMDILDevice *
 AMDILSubtarget::device() const
 {
  return mDevice;
 }
--- a/lib/Target/AMDGPU/AMDILSubtarget.h
+++ b/lib/Target/AMDGPU/AMDILSubtarget.h
@ -0,0 +1,76 @@
 //=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file declares the AMDIL specific subclass of TargetSubtarget.
 //
 //===----------------------------------------------------------------------===//
 #ifndef _AMDILSUBTARGET_H_
 #define _AMDILSUBTARGET_H_
 #include "AMDILDevice.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <cstdlib>
 #include <string>
 #define GET_SUBTARGETINFO_HEADER
 #include "AMDGPUGenSubtargetInfo.inc"
 #define MAX_CB_SIZE (1 << 16)
 namespace llvm {
  class Module;
  class AMDILKernelManager;
  class AMDILGlobalManager;
  class AMDILDevice;
  class AMDILSubtarget : public AMDILGenSubtargetInfo {
    private:
      bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities];
      mutable AMDILGlobalManager *mGM;
      mutable AMDILKernelManager *mKM;
      const AMDILDevice *mDevice;
      size_t mDefaultSize[3];
      std::string mDevName;
      uint32_t mVersion;
      bool mIs64bit;
      bool mIs32on64bit;
      bool mDumpCode;
    public:
      AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
      virtual ~AMDILSubtarget();
      bool isOverride(AMDILDeviceInfo::Caps) const;
      bool is64bit() const;
      // Helper functions to simplify if statements
      bool isTargetELF() const;
      AMDILGlobalManager* getGlobalManager() const;
      void setGlobalManager(AMDILGlobalManager *gm) const;
      AMDILKernelManager* getKernelManager() const;
      void setKernelManager(AMDILKernelManager *gm) const;
      const AMDILDevice* device() const;
      std::string getDataLayout() const;
      std::string getDeviceName() const;
      virtual size_t getDefaultSize(uint32_t dim) const;
      // Return the version of CAL that the backend should target.
      uint32_t calVersion() const;
      // ParseSubtargetFeatures - Parses features string setting specified
      // subtarget options.  Definition of function is
      //auto generated by tblgen.
      void
        ParseSubtargetFeatures(
            llvm::StringRef CPU,
            llvm::StringRef FS);
      bool dumpCode() const { return mDumpCode; }
  };
 } // end namespace llvm
 #endif // AMDILSUBTARGET_H_
--- a/lib/Target/AMDGPU/AMDILTokenDesc.td
+++ b/lib/Target/AMDGPU/AMDILTokenDesc.td
@ -0,0 +1,120 @@
 //===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===--------------------------------------------------------------------===//
 include "AMDILEnumeratedTypes.td"
 // Each token is 32 bits as specified in section 2.1 of the IL spec
 class ILToken <bits<32> n> {
    field bits<32> _bits = n;
 }
 // Section 2.2.1 - IL Language Token
 class ILLang<bits<8> client_type> : ILToken<0> {
    let _bits{0-7} = client_type;
 }
 // Section 2.2.2 - IL Version Token
 class ILVersion<bits<8> minor_version, bits<8> major_version, ILShader shader_type>  : ILToken<0> {
    let _bits{0-7} = minor_version;
    let _bits{8-15} = major_version;
    let _bits{16-23} = shader_type.Value;
 }
 // Section 2.2.3 - IL Opcode Token
 class ILOpcode<ILOpCode opcode, bits<14> control, bit sec_mod_pre, bit pri_mod_pre> : ILToken<0> {
    let _bits{0-15} = opcode.Value;
    let _bits{16-29} = control;
    let _bits{30} = sec_mod_pre;
    let _bits{31} = pri_mod_pre;
 }
 // Section 2.2.4 - IL Destination Token
 class ILDst<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
    let _bits{0-15} = register_num.Value;
    let _bits{16-21} = register_type.Value;
    let _bits{22} = mod_pre;
    let _bits{23-24} = relative_address;
    let _bits{25} = dimension;
    let _bits{26} = immediate_pre;
    let _bits{31} = extended;
 }
 // Section 2.2.5 - IL Destination Modifier Token
 class ILDstMod<ILModDstComp x, ILModDstComp y, ILModDstComp z, ILModDstComp w, bit clamp, ILShiftScale shift_scale> : ILToken<0> {
    let _bits{0-1} = x.Value;
    let _bits{2-3} = y.Value;
    let _bits{4-5} = z.Value;
    let _bits{6-7} = w.Value;
    let _bits{8} = clamp;
    //let _bits{9-12} = shift_scale;
 }
 // Section 2.2.6 - IL Source Token
 class ILSrc<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
    let _bits{0-15} = register_num.Value;
    let _bits{16-21} = register_type.Value;
    let _bits{22} = mod_pre;
    let _bits{23-24} = relative_address;
    let _bits{25} = dimension;
    let _bits{26} = immediate_pre;
    let _bits{31} = extended;
 }
 // Section 2.2.7 - IL Source Modifier Token
 class ILSrcMod<ILComponentSelect swizzle_x, bit negate_x, ILComponentSelect swizzle_y, bit negate_y,
               ILComponentSelect swizzle_z, bit negate_z, ILComponentSelect swizzle_w, bit negate_w,
               bit invert, bit bias, bit x2, bit sign, bit abs, ILDivComp divComp,
               bits<8> clamp> : ILToken<0> {
    let _bits{0-2} = swizzle_x.Value;
    let _bits{3} = negate_x;
    let _bits{4-6} = swizzle_y.Value;
    let _bits{7} = negate_y;
    let _bits{8-10} = swizzle_z.Value;
    let _bits{11} = negate_z;
    let _bits{12-14} = swizzle_w.Value;
    let _bits{15} = negate_w;
    let _bits{16} = invert;
    let _bits{17} = bias;
    let _bits{18} = x2;
    let _bits{19} = sign;
    let _bits{20} = abs;
    let _bits{21-23} = divComp.Value;
    let _bits{24-31} = clamp;
 }
 // Section 2.2.8 - IL Relative Address Token
 class ILRelAddr<AMDILReg address_register, bit loop_relative, ILAddressing component> : ILToken<0> {
    let _bits{0-15} = address_register.Value;
    let _bits{16} = loop_relative;
    let _bits{17-19} = component.Value;
 }
 // IL Literal Token
 class ILLiteral<bits<32> val> : ILToken<0> {
    let _bits = val;
 }
 // All tokens required for a destination register
 class ILDstReg<ILDst Reg, ILDstMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
    ILDst       reg = Reg;
    ILDstMod    mod = Mod;
    ILRelAddr   rel = Rel;
    ILSrc       reg_rel = Reg_Rel;
    ILSrcMod    reg_rel_mod = Reg_Rel_Mod;
 }
 // All tokens required for a source register
 class ILSrcReg<ILSrc Reg, ILSrcMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
    ILSrc       reg = Reg;
    ILSrcMod    mod = Mod;
    ILRelAddr   rel = Rel;
    ILSrc       reg_rel = Reg_Rel;
    ILSrcMod    reg_rel_mod = Reg_Rel_Mod;
 }
--- a/lib/Target/AMDGPU/AMDILUtilityFunctions.h
+++ b/lib/Target/AMDGPU/AMDILUtilityFunctions.h
@ -0,0 +1,75 @@
 //===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //==-----------------------------------------------------------------------===//
 //
 // This file provides helper macros for expanding case statements.
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDILUTILITYFUNCTIONS_H_
 #define AMDILUTILITYFUNCTIONS_H_
 // Macros that are used to help with switch statements for various data types
 // However, these macro's do not return anything unlike the second set below.
 #define ExpandCaseTo32bitIntTypes(Instr)  \
 case Instr##_i32:
 #define ExpandCaseTo32bitIntTruncTypes(Instr)  \
 case Instr##_i32i8: \
 case Instr##_i32i16: 
 #define ExpandCaseToIntTypes(Instr) \
    ExpandCaseTo32bitIntTypes(Instr)
 #define ExpandCaseToIntTruncTypes(Instr) \
    ExpandCaseTo32bitIntTruncTypes(Instr)
 #define ExpandCaseToFloatTypes(Instr) \
    case Instr##_f32:
 #define ExpandCaseTo32bitScalarTypes(Instr) \
    ExpandCaseTo32bitIntTypes(Instr) \
 case Instr##_f32:
 #define ExpandCaseToAllScalarTypes(Instr) \
    ExpandCaseToFloatTypes(Instr) \
 ExpandCaseToIntTypes(Instr)
 #define ExpandCaseToAllScalarTruncTypes(Instr) \
    ExpandCaseToFloatTruncTypes(Instr) \
 ExpandCaseToIntTruncTypes(Instr)
 #define ExpandCaseToAllTypes(Instr) \
 ExpandCaseToAllScalarTypes(Instr)
 #define ExpandCaseToAllTruncTypes(Instr) \
 ExpandCaseToAllScalarTruncTypes(Instr)
 // Macros that expand into  statements with return values
 #define ExpandCaseTo32bitIntReturn(Instr, Return)  \
 case Instr##_i32: return Return##_i32;
 #define ExpandCaseToIntReturn(Instr, Return) \
    ExpandCaseTo32bitIntReturn(Instr, Return)
 #define ExpandCaseToFloatReturn(Instr, Return) \
    case Instr##_f32: return Return##_f32;\
 #define ExpandCaseToAllScalarReturn(Instr, Return) \
    ExpandCaseToFloatReturn(Instr, Return) \
 ExpandCaseToIntReturn(Instr, Return)
 // These macros expand to common groupings of RegClass ID's
 #define ExpandCaseTo1CompRegID \
 case AMDGPU::GPRI32RegClassID: \
 case AMDGPU::GPRF32RegClassID:
 #define ExpandCaseTo32BitType(Instr) \
 case Instr##_i32: \
 case Instr##_f32:
 #endif // AMDILUTILITYFUNCTIONS_H_
--- a/lib/Target/AMDGPU/AMDILVersion.td
+++ b/lib/Target/AMDGPU/AMDILVersion.td
@ -0,0 +1,58 @@
 //===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===--------------------------------------------------------------------===//
 // Intrinsic operation support
 //===--------------------------------------------------------------------===//
 let TargetPrefix = "AMDIL", isTarget = 1 in {
 def int_AMDIL_barrier   : GCCBuiltin<"barrier">,
        BinaryIntNoRetInt;
 def int_AMDIL_barrier_global   : GCCBuiltin<"barrierGlobal">,
      BinaryIntNoRetInt;
 def int_AMDIL_barrier_local   : GCCBuiltin<"barrierLocal">,
      BinaryIntNoRetInt;
 def int_AMDIL_barrier_region   : GCCBuiltin<"barrierRegion">,
      BinaryIntNoRetInt;
 def int_AMDIL_get_region_id : GCCBuiltin<"__amdil_get_region_id_int">,
    Intrinsic<[llvm_v4i32_ty], [], []>;
 def int_AMDIL_get_region_local_id : GCCBuiltin<"__amdil_get_region_local_id_int">,
    Intrinsic<[llvm_v4i32_ty], [], []>;
 def int_AMDIL_get_num_regions : GCCBuiltin<"__amdil_get_num_regions_int">,
    Intrinsic<[llvm_v4i32_ty], [], []>;
 def int_AMDIL_get_region_size : GCCBuiltin<"__amdil_get_region_size_int">,
    Intrinsic<[llvm_v4i32_ty], [], []>;
 }
 let isCall=1, isNotDuplicable=1 in {
  let Predicates=[hasRegionAS] in {
 def BARRIER_EGNI : BinaryOpNoRet<IL_OP_BARRIER, (outs),
      (ins GPRI32:$flag, GPRI32:$id),
      "fence_threads_memory_lds_gds_gws",
      [(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
 }
 let Predicates=[noRegionAS] in {
 def BARRIER_7XX : BinaryOpNoRet<IL_OP_BARRIER, (outs),
      (ins GPRI32:$flag, GPRI32:$id),
      "fence_threads_memory_lds",
      [(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
 }
 def BARRIER_LOCAL : BinaryOpNoRet<IL_OP_BARRIER_LOCAL, (outs),
      (ins GPRI32:$flag, GPRI32:$id),
      "fence_threads_lds",
      [(int_AMDIL_barrier_local GPRI32:$flag, GPRI32:$id)]>;
 def BARRIER_GLOBAL : BinaryOpNoRet<IL_OP_BARRIER_GLOBAL, (outs),
      (ins GPRI32:$flag, GPRI32:$id),
      "fence_threads_memory",
      [(int_AMDIL_barrier_global GPRI32:$flag, GPRI32:$id)]>;
 def BARRIER_REGION : BinaryOpNoRet<IL_OP_BARRIER_REGION, (outs),
    (ins GPRI32:$flag, GPRI32:$id),
    "fence_threads_gds",
    [(int_AMDIL_barrier_region GPRI32:$flag, GPRI32:$id)]>;
 }
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@ -0,0 +1,50 @@
 set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
 tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
 tablegen(LLVM AMDGPUGenCodeEmitter.inc -gen-emitter)
 tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
 add_public_tablegen_target(AMDGPUCommonTableGen)
 add_llvm_target(AMDGPUCodeGen
  AMDIL7XXDevice.cpp
  AMDILCFGStructurizer.cpp
  AMDILDevice.cpp
  AMDILDeviceInfo.cpp
  AMDILEvergreenDevice.cpp
  AMDILFrameLowering.cpp
  AMDILInstrInfo.cpp
  AMDILIntrinsicInfo.cpp
  AMDILISelDAGToDAG.cpp
  AMDILISelLowering.cpp
  AMDILNIDevice.cpp
  AMDILPeepholeOptimizer.cpp
  AMDILRegisterInfo.cpp
  AMDILSIDevice.cpp
  AMDILSubtarget.cpp
  AMDGPUTargetMachine.cpp
  AMDGPUISelLowering.cpp
  AMDGPUConvertToISA.cpp
  AMDGPUInstrInfo.cpp
  AMDGPURegisterInfo.cpp
  AMDGPUUtil.cpp
  R600CodeEmitter.cpp
  R600InstrInfo.cpp
  R600ISelLowering.cpp
  R600KernelParameters.cpp
  R600MachineFunctionInfo.cpp
  R600RegisterInfo.cpp
  SIAssignInterpRegs.cpp
  SICodeEmitter.cpp
  SIInstrInfo.cpp
  SIISelLowering.cpp
  SIMachineFunctionInfo.cpp
  SIRegisterInfo.cpp
  )
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
--- a/lib/Target/AMDGPU/GENERATED_FILES
+++ b/lib/Target/AMDGPU/GENERATED_FILES
@ -0,0 +1,13 @@
 There are 3 files used by this backend that are generated by perl scripts:
 - R600RegisterInfo.td
  + Generated with:
    perl R600GenRegisterInfo.pl > R600RegisterInfo.td
 - R600HwRegInfo.include
  + Generated with:
    perl R600GenRegisterInfo.pl
 - SIRegisterInfo.td
  + Generated with:
    perl SIGenRegisterInfo.pl > SIRegisterInfo.td
--- a/lib/Target/AMDGPU/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@ -0,0 +1,32 @@
 ;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
 ; This file is distributed under the University of Illinois Open Source
 ; License. See LICENSE.TXT for details.
 ;
 ;===------------------------------------------------------------------------===;
 ;
 ; This is an LLVMBuild description file for the components in this subdirectory.
 ;
 ; For more information on the LLVMBuild system, please see:
 ;
 ;   http://llvm.org/docs/LLVMBuild.html
 ;
 ;===------------------------------------------------------------------------===;
 [common]
 subdirectories = MCTargetDesc TargetInfo
 [component_0]
 type = TargetGroup
 name = AMDGPU
 parent = Target
 has_asmprinter = 0
 [component_1]
 type = Library
 name = AMDGPUCodeGen
 parent = AMDGPU
 required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDGPUInfo AMDGPUDesc
 add_to_library_groups = AMDGPU
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@ -0,0 +1,104 @@
 //===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - TODO: Add brief description -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // TODO: Add full description
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUMCAsmInfo.h"
 #ifndef NULL
 #define NULL 0
 #endif
 using namespace llvm;
 AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
 {
  //===------------------------------------------------------------------===//
  HasSubsectionsViaSymbols = true;
  HasMachoZeroFillDirective = false;
  HasMachoTBSSDirective = false;
  HasStaticCtorDtorReferenceInStaticMode = false;
  LinkerRequiresNonEmptyDwarfLines = true;
  MaxInstLength = 16;
  PCSymbol = "$";
  SeparatorString = "\n";
  CommentColumn = 40;
  CommentString = ";";
  LabelSuffix = ":";
  GlobalPrefix = "@";
  PrivateGlobalPrefix = ";.";
  LinkerPrivateGlobalPrefix = "!";
  InlineAsmStart = ";#ASMSTART";
  InlineAsmEnd = ";#ASMEND";
  AssemblerDialect = 0;
  AllowQuotesInName = false;
  AllowNameToStartWithDigit = false;
  AllowPeriodsInName = false;
  //===--- Data Emission Directives -------------------------------------===//
  ZeroDirective = ".zero";
  AsciiDirective = ".ascii\t";
  AscizDirective = ".asciz\t";
  Data8bitsDirective = ".byte\t";
  Data16bitsDirective = ".short\t";
  Data32bitsDirective = ".long\t";
  Data64bitsDirective = ".quad\t";
  GPRel32Directive = NULL;
  SunStyleELFSectionSwitchSyntax = true;
  UsesELFSectionDirectiveForBSS = true;
  HasMicrosoftFastStdCallMangling = false;
  //===--- Alignment Information ----------------------------------------===//
  AlignDirective = ".align\t";
  AlignmentIsInBytes = true;
  TextAlignFillValue = 0;
  //===--- Global Variable Emission Directives --------------------------===//
  GlobalDirective = ".global";
  ExternDirective = ".extern";
  HasSetDirective = false;
  HasAggressiveSymbolFolding = true;
  LCOMMDirectiveType = LCOMM::None;
  COMMDirectiveAlignmentIsInBytes = false;
  HasDotTypeDotSizeDirective = false;
  HasSingleParameterDotFile = true;
  HasNoDeadStrip = true;
  HasSymbolResolver = false;
  WeakRefDirective = ".weakref\t";
  WeakDefDirective = ".weakdef\t";
  LinkOnceDirective = NULL;
  HiddenVisibilityAttr = MCSA_Hidden;
  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
  ProtectedVisibilityAttr = MCSA_Protected;
  //===--- Dwarf Emission Directives -----------------------------------===//
  HasLEB128 = true;
  SupportsDebugInformation = true;
  ExceptionsType = ExceptionHandling::None;
  DwarfUsesInlineInfoSection = false;
  DwarfSectionOffsetDirective = ".offset";
 }
 const char*
 AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
 {
  switch (AS) {
    default:
      return NULL;
    case 0:
      return NULL;
  };
  return NULL;
 }
 const MCSection*
 AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
 {
  return NULL;
 }
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@ -0,0 +1,30 @@
 //===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // TODO: Add full description
 //
 //===----------------------------------------------------------------------===//
 #ifndef AMDGPUMCASMINFO_H_
 #define AMDGPUMCASMINFO_H_
 #include "llvm/MC/MCAsmInfo.h"
 namespace llvm {
  class Target;
  class StringRef;
  class AMDGPUMCAsmInfo : public MCAsmInfo {
    public:
      explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
      const char*
        getDataASDirective(unsigned int Size, unsigned int AS) const;
      const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
  };
 } // namespace llvm
 #endif // AMDGPUMCASMINFO_H_
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@ -0,0 +1,61 @@
 #include "AMDGPUMCTargetDesc.h"
 #include "AMDGPUMCAsmInfo.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #define GET_INSTRINFO_MC_DESC
 #include "AMDGPUGenInstrInfo.inc"
 #define GET_SUBTARGETINFO_MC_DESC
 #include "AMDGPUGenSubtargetInfo.inc"
 #define GET_REGINFO_MC_DESC
 #include "AMDGPUGenRegisterInfo.inc"
 using namespace llvm;
 static MCInstrInfo *createAMDGPUMCInstrInfo() {
  MCInstrInfo *X = new MCInstrInfo();
  InitAMDILMCInstrInfo(X);
  return X;
 }
 static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
  MCRegisterInfo *X = new MCRegisterInfo();
  InitAMDILMCRegisterInfo(X, 0);
  return X;
 }
 static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
                                                   StringRef FS) {
  MCSubtargetInfo * X = new MCSubtargetInfo();
  InitAMDILMCSubtargetInfo(X, TT, CPU, FS);
  return X;
 }
 static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                               CodeModel::Model CM,
                                               CodeGenOpt::Level OL) {
  MCCodeGenInfo *X = new MCCodeGenInfo();
  X->InitMCCodeGenInfo(RM, CM, OL);
  return X;
 }
 extern "C" void LLVMInitializeAMDGPUTargetMC() {
  RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
  TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
  TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
  TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
  TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
 }
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@ -0,0 +1,35 @@
 //===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file provides AMDGPU specific target descriptions.
 //
 //===----------------------------------------------------------------------===//
 //
 #ifndef AMDGPUMCTARGETDESC_H
 #define AMDGPUMCTARGETDESC_H
 namespace llvm {
 class MCSubtargetInfo;
 class Target;
 extern Target TheAMDGPUTarget;
 } // End llvm namespace
 #define GET_REGINFO_ENUM
 #include "AMDGPUGenRegisterInfo.inc"
 #define GET_INSTRINFO_ENUM
 #include "AMDGPUGenInstrInfo.inc"
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
 #endif // AMDGPUMCTARGETDESC_H
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@ -0,0 +1,7 @@
 add_llvm_library(LLVMAMDGPUDesc
  AMDGPUMCTargetDesc.cpp
  AMDGPUMCAsmInfo.cpp
  )
 add_dependencies(LLVMAMDGPUDesc AMDGPUCommonTableGen)
--- a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
@ -0,0 +1,23 @@
 ;===- ./lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
 ; This file is distributed under the University of Illinois Open Source
 ; License. See LICENSE.TXT for details.
 ;
 ;===------------------------------------------------------------------------===;
 ;
 ; This is an LLVMBuild description file for the components in this subdirectory.
 ;
 ; For more information on the LLVMBuild system, please see:
 ;
 ;   http://llvm.org/docs/LLVMBuild.html
 ;
 ;===------------------------------------------------------------------------===;
 [component_0]
 type = Library
 name = AMDGPUDesc
 parent = AMDGPU
 required_libraries = AMDGPUInfo MC
 add_to_library_groups = AMDGPU
--- a/lib/Target/AMDGPU/MCTargetDesc/Makefile
+++ b/lib/Target/AMDGPU/MCTargetDesc/Makefile
@ -0,0 +1,16 @@
 ##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMAMDGPUDesc
 # Hack: we need to include 'main' target directory to grab private headers
 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 include $(LEVEL)/Makefile.common
--- a/lib/Target/AMDGPU/Makefile
+++ b/lib/Target/AMDGPU/Makefile
@ -0,0 +1,22 @@
 ##===- lib/Target/AMDGPU/Makefile ---------------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../..
 LIBRARYNAME = LLVMAMDGPUCodeGen
 TARGET = AMDGPU
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
 		AMDGPUGenDAGISel.inc  AMDGPUGenSubtargetInfo.inc \
 		AMDGPUGenCodeEmitter.inc AMDGPUGenCallingConv.inc \
 		AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
 DIRS = TargetInfo MCTargetDesc
 include $(LEVEL)/Makefile.common
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@ -0,0 +1,27 @@
 //===-- Processors.td - TODO: Add brief description -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // AMDIL processors supported.
 //
 //===----------------------------------------------------------------------===//
 class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
 : Processor<Name, itin, Features>;
 def : Proc<"rv710",      R600_EG_Itin, []>;
 def : Proc<"rv730",      R600_EG_Itin, []>;
 def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
 def : Proc<"cedar",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"redwood",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"juniper",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"cypress",    R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
 def : Proc<"barts",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"turks",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"caicos",     R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"cayman",     R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
 def : Proc<"SI", SI_Itin, []>;
--- a/lib/Target/AMDGPU/R600CodeEmitter.cpp
+++ b/lib/Target/AMDGPU/R600CodeEmitter.cpp
@ -0,0 +1,614 @@
 //===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This code emitters outputs bytecode that is understood by the r600g driver
 // in the Mesa [1] project.  The bytecode is very similar to the hardware's ISA,
 // except that the size of the instruction fields are rounded up to the
 // nearest byte.
 //
 // [1] http://www.mesa3d.org/
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
 #include "AMDGPUUtil.h"
 #include "AMDILCodeEmitter.h"
 #include "AMDILInstrInfo.h"
 #include "AMDILUtilityFunctions.h"
 #include "R600InstrInfo.h"
 #include "R600RegisterInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetMachine.h"
 #include <stdio.h>
 #define SRC_BYTE_COUNT 11
 #define DST_BYTE_COUNT 5
 using namespace llvm;
 namespace {
 class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
 private:
  static char ID;
  formatted_raw_ostream &_OS;
  const TargetMachine * TM;
  const MachineRegisterInfo * MRI;
  const R600RegisterInfo * TRI;
  bool IsCube;
  bool IsReduction;
  bool IsVector;
  unsigned currentElement;
  bool IsLast;
 public:
  R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
      _OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
      IsLast(true) { }
  const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
  bool runOnMachineFunction(MachineFunction &MF);
  virtual uint64_t getMachineOpValue(const MachineInstr &MI,
                                     const MachineOperand &MO) const;
 private:
  void EmitALUInstr(MachineInstr  &MI);
  void EmitSrc(const MachineOperand & MO, int chan_override  = -1);
  void EmitDst(const MachineOperand & MO);
  void EmitALU(MachineInstr &MI, unsigned numSrc);
  void EmitTexInstr(MachineInstr &MI);
  void EmitFCInstr(MachineInstr &MI);
  void EmitNullBytes(unsigned int byteCount);
  void EmitByte(unsigned int byte);
  void EmitTwoBytes(uint32_t bytes);
  void Emit(uint32_t value);
  void Emit(uint64_t value);
  unsigned getHWReg(unsigned regNo) const;
 };
 } // End anonymous namespace
 enum RegElement {
  ELEMENT_X = 0,
  ELEMENT_Y,
  ELEMENT_Z,
  ELEMENT_W
 };
 enum InstrTypes {
  INSTR_ALU = 0,
  INSTR_TEX,
  INSTR_FC,
  INSTR_NATIVE,
  INSTR_VTX
 };
 enum FCInstr {
  FC_IF = 0,
  FC_ELSE,
  FC_ENDIF,
  FC_BGNLOOP,
  FC_ENDLOOP,
  FC_BREAK,
  FC_BREAK_NZ_INT,
  FC_CONTINUE,
  FC_BREAK_Z_INT
 };
 enum TextureTypes {
  TEXTURE_1D = 1,
  TEXTURE_2D,
  TEXTURE_3D,
  TEXTURE_CUBE,
  TEXTURE_RECT,
  TEXTURE_SHADOW1D,
  TEXTURE_SHADOW2D,
  TEXTURE_SHADOWRECT,
  TEXTURE_1D_ARRAY,
  TEXTURE_2D_ARRAY,
  TEXTURE_SHADOW1D_ARRAY,
  TEXTURE_SHADOW2D_ARRAY
 };
 char R600CodeEmitter::ID = 0;
 FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
  return new R600CodeEmitter(OS);
 }
 bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
  TM = &MF.getTarget();
  MRI = &MF.getRegInfo();
  TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
  const R600InstrInfo * TII = static_cast<const R600InstrInfo *>(TM->getInstrInfo());
  const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
  std::string gpu = STM.getDeviceName();
  if (STM.dumpCode()) {
    MF.dump();
  }
  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                  BB != BB_E; ++BB) {
     MachineBasicBlock &MBB = *BB;
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                       I != E; ++I) {
          MachineInstr &MI = *I;
 	  IsReduction = AMDGPU::isReductionOp(MI.getOpcode());
 	  IsVector = TII->isVector(MI);
 	  IsCube = AMDGPU::isCubeOp(MI.getOpcode());
          if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
            continue;
          }
          if (AMDGPU::isTexOp(MI.getOpcode())) {
            EmitTexInstr(MI);
          } else if (AMDGPU::isFCOp(MI.getOpcode())){
            EmitFCInstr(MI);
          } else if (IsReduction || IsVector || IsCube) {
            IsLast = false;
            // XXX: On Cayman, some (all?) of the vector instructions only need
            // to fill the first three slots.
            for (currentElement = 0; currentElement < 4; currentElement++) {
              IsLast = (currentElement == 3);
              EmitALUInstr(MI);
            }
            IsReduction = false;
 	    IsVector = false;
 	    IsCube = false;
          } else if (MI.getOpcode() == AMDGPU::RETURN ||
                     MI.getOpcode() == AMDGPU::BUNDLE ||
                     MI.getOpcode() == AMDGPU::KILL) {
            continue;
          } else {
            switch(MI.getOpcode()) {
            case AMDGPU::RAT_WRITE_CACHELESS_eg:
              {
                  uint64_t inst = getBinaryCodeForInstr(MI);
                // Set End Of Program bit
                // XXX: Need better check of end of program.  EOP should be
                // encoded in one of the operands of the MI, and it should be
                // set in a prior pass.
                MachineBasicBlock::iterator NextI = llvm::next(I);
                MachineInstr &NextMI = *NextI;
                if (NextMI.getOpcode() == AMDGPU::RETURN) {
                  inst |= (((uint64_t)1) << 53);
                }
                EmitByte(INSTR_NATIVE);
                Emit(inst);
                break;
              }
            case AMDGPU::VTX_READ_PARAM_i32_eg:
            case AMDGPU::VTX_READ_PARAM_f32_eg:
            case AMDGPU::VTX_READ_GLOBAL_i32_eg:
            case AMDGPU::VTX_READ_GLOBAL_f32_eg:
            case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
            case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
              {
                uint64_t InstWord01 = getBinaryCodeForInstr(MI);
                uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
                EmitByte(INSTR_VTX);
                Emit(InstWord01);
                Emit(InstWord2);
                break;
              }
            default:
              EmitALUInstr(MI);
              break;
          }
        }
    }
  }
  return false;
 }
 void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
 {
  unsigned numOperands = MI.getNumExplicitOperands();
   // Some instructions are just place holder instructions that represent
   // operations that the GPU does automatically.  They should be ignored.
  if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
    return;
  }
  // XXX Check if instruction writes a result
  if (numOperands < 1) {
    return;
  }
  const MachineOperand dstOp = MI.getOperand(0);
  // Emit instruction type
  EmitByte(0);
  if (IsCube) {
    static const int cube_src_swz[] = {2, 2, 0, 1};
    EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
    EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
    EmitNullBytes(SRC_BYTE_COUNT);
  } else {
    unsigned int opIndex;
    for (opIndex = 1; opIndex < numOperands; opIndex++) {
      // Literal constants are always stored as the last operand.
      if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
        break;
      }
      EmitSrc(MI.getOperand(opIndex));
    }
    // Emit zeros for unused sources
    for ( ; opIndex < 4; opIndex++) {
      EmitNullBytes(SRC_BYTE_COUNT);
    }
  }
  EmitDst(dstOp);
  EmitALU(MI, numOperands - 1);
 }
 void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
 {
  uint32_t value = 0;
  // Emit the source select (2 bytes).  For GPRs, this is the register index.
  // For other potential instruction operands, (e.g. constant registers) the
  // value of the source select is defined in the r600isa docs.
  if (MO.isReg()) {
    unsigned reg = MO.getReg();
    EmitTwoBytes(getHWReg(reg));
    if (reg == AMDGPU::ALU_LITERAL_X) {
      const MachineInstr * parent = MO.getParent();
      unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
      MachineOperand immOp = parent->getOperand(immOpIndex);
      if (immOp.isFPImm()) {
        value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
      } else {
        assert(immOp.isImm());
        value = immOp.getImm();
      }
    }
  } else {
    // XXX: Handle other operand types.
    EmitTwoBytes(0);
  }
  // Emit the source channel (1 byte)
  if (chan_override != -1) {
    EmitByte(chan_override);
  } else if (IsReduction) {
    EmitByte(currentElement);
  } else if (MO.isReg()) {
    EmitByte(TRI->getHWRegChan(MO.getReg()));
  } else {
    EmitByte(0);
  }
  // XXX: Emit isNegated (1 byte)
  if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
      && (MO.getTargetFlags() & MO_FLAG_NEG ||
     (MO.isReg() &&
      (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
    EmitByte(1);
  } else {
    EmitByte(0);
  }
  // Emit isAbsolute (1 byte)
  if (MO.getTargetFlags() & MO_FLAG_ABS) {
    EmitByte(1);
  } else {
    EmitByte(0);
  }
  // XXX: Emit relative addressing mode (1 byte)
  EmitByte(0);
  // Emit kc_bank, This will be adjusted later by r600_asm
  EmitByte(0);
  // Emit the literal value, if applicable (4 bytes).
  Emit(value);
 }
 void R600CodeEmitter::EmitDst(const MachineOperand & MO)
 {
  if (MO.isReg()) {
    // Emit the destination register index (1 byte)
    EmitByte(getHWReg(MO.getReg()));
    // Emit the element of the destination register (1 byte)
    if (IsReduction || IsCube || IsVector) {
      EmitByte(currentElement);
    } else {
      EmitByte(TRI->getHWRegChan(MO.getReg()));
    }
    // Emit isClamped (1 byte)
    if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
      EmitByte(1);
    } else {
      EmitByte(0);
    }
    // Emit writemask (1 byte).
    if (((IsReduction || IsVector) &&
          currentElement != TRI->getHWRegChan(MO.getReg()))
       || MO.getTargetFlags() & MO_FLAG_MASK) {
      EmitByte(0);
    } else {
      EmitByte(1);
    }
    // XXX: Emit relative addressing mode
    EmitByte(0);
  } else {
    // XXX: Handle other operand types.  Are there any for destination regs?
    EmitNullBytes(DST_BYTE_COUNT);
  }
 }
 void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
 {
  // Emit the instruction (2 bytes)
  EmitTwoBytes(getBinaryCodeForInstr(MI));
  // Emit IsLast (for this instruction group) (1 byte)
  if (IsLast) {
    EmitByte(1);
  } else {
    EmitByte(0);
  }
  // Emit isOp3 (1 byte)
  if (numSrc == 3) {
    EmitByte(1);
  } else {
    EmitByte(0);
  }
  // XXX: Emit predicate (1 byte)
  EmitByte(0);
  // XXX: Emit bank swizzle. (1 byte)  Do we need this?  It looks like
  // r600_asm.c sets it.
  EmitByte(0);
  // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
  EmitByte(0);
  // XXX: Emit OMOD (1 byte) Not implemented.
  EmitByte(0);
  // XXX: Emit index_mode.  I think this is for indirect addressing, so we
  // don't need to worry about it.
  EmitByte(0);
 }
 void R600CodeEmitter::EmitTexInstr(MachineInstr &MI)
 {
  unsigned opcode = MI.getOpcode();
  bool hasOffsets = (opcode == AMDGPU::TEX_LD);
  unsigned op_offset = hasOffsets ? 3 : 0;
  int64_t sampler = MI.getOperand(op_offset+2).getImm();
  int64_t textureType = MI.getOperand(op_offset+3).getImm();
  unsigned srcSelect[4] = {0, 1, 2, 3};
  // Emit instruction type
  EmitByte(1);
  // Emit instruction
  EmitByte(getBinaryCodeForInstr(MI));
  // XXX: Emit resource id r600_shader.c uses sampler + 1.  Why?
  EmitByte(sampler + 1 + 1);
  // Emit source register
  EmitByte(getHWReg(MI.getOperand(1).getReg()));
  // XXX: Emit src isRelativeAddress
  EmitByte(0);
  // Emit destination register
  EmitByte(getHWReg(MI.getOperand(0).getReg()));
  // XXX: Emit dst isRealtiveAddress
  EmitByte(0);
  // XXX: Emit dst select
  EmitByte(0); // X
  EmitByte(1); // Y
  EmitByte(2); // Z
  EmitByte(3); // W
  // XXX: Emit lod bias
  EmitByte(0);
  // XXX: Emit coord types
  unsigned coordType[4] = {1, 1, 1, 1};
  if (textureType == TEXTURE_RECT
      || textureType == TEXTURE_SHADOWRECT) {
    coordType[ELEMENT_X] = 0;
    coordType[ELEMENT_Y] = 0;
  }
  if (textureType == TEXTURE_1D_ARRAY
      || textureType == TEXTURE_SHADOW1D_ARRAY) {
    if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
      coordType[ELEMENT_Y] = 0;
    } else {
      coordType[ELEMENT_Z] = 0;
      srcSelect[ELEMENT_Z] = ELEMENT_Y;
    }
  } else if (textureType == TEXTURE_2D_ARRAY
             || textureType == TEXTURE_SHADOW2D_ARRAY) {
    coordType[ELEMENT_Z] = 0;
  }
  for (unsigned i = 0; i < 4; i++) {
    EmitByte(coordType[i]);
  }
  // XXX: Emit offsets
  if (hasOffsets)
 	  for (unsigned i = 2; i < 5; i++)
 		  EmitByte(MI.getOperand(i).getImm()<<1);
  else
 	  EmitNullBytes(3);
  // Emit sampler id
  EmitByte(sampler);
  // XXX:Emit source select
  if ((textureType == TEXTURE_SHADOW1D
      || textureType == TEXTURE_SHADOW2D
      || textureType == TEXTURE_SHADOWRECT
      || textureType == TEXTURE_SHADOW1D_ARRAY)
      && opcode != AMDGPU::TEX_SAMPLE_C_L
      && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
    srcSelect[ELEMENT_W] = ELEMENT_Z;
  }
  for (unsigned i = 0; i < 4; i++) {
    EmitByte(srcSelect[i]);
  }
 }
 void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
 {
  // Emit instruction type
  EmitByte(INSTR_FC);
  // Emit SRC
  unsigned numOperands = MI.getNumOperands();
  if (numOperands > 0) {
    assert(numOperands == 1);
    EmitSrc(MI.getOperand(0));
  } else {
    EmitNullBytes(SRC_BYTE_COUNT);
  }
  // Emit FC Instruction
  enum FCInstr instr;
  switch (MI.getOpcode()) {
  case AMDGPU::BREAK_LOGICALZ_f32:
    instr = FC_BREAK;
    break;
  case AMDGPU::BREAK_LOGICALNZ_f32:
  case AMDGPU::BREAK_LOGICALNZ_i32:
    instr = FC_BREAK_NZ_INT;
    break;
  case AMDGPU::BREAK_LOGICALZ_i32:
    instr = FC_BREAK_Z_INT;
    break;
  case AMDGPU::CONTINUE_LOGICALNZ_f32:
  case AMDGPU::CONTINUE_LOGICALNZ_i32:
    instr = FC_CONTINUE;
    break;
  case AMDGPU::IF_LOGICALNZ_f32:
  case AMDGPU::IF_LOGICALNZ_i32:
    instr = FC_IF;
    break;
  case AMDGPU::IF_LOGICALZ_f32:
    abort();
    break;
  case AMDGPU::ELSE:
    instr = FC_ELSE;
    break;
  case AMDGPU::ENDIF:
    instr = FC_ENDIF;
    break;
  case AMDGPU::ENDLOOP:
    instr = FC_ENDLOOP;
    break;
  case AMDGPU::WHILELOOP:
    instr = FC_BGNLOOP;
    break;
  default:
    abort();
    break;
  }
  EmitByte(instr);
 }
 void R600CodeEmitter::EmitNullBytes(unsigned int byteCount)
 {
  for (unsigned int i = 0; i < byteCount; i++) {
    EmitByte(0);
  }
 }
 void R600CodeEmitter::EmitByte(unsigned int byte)
 {
  _OS.write((uint8_t) byte & 0xff);
 }
 void R600CodeEmitter::EmitTwoBytes(unsigned int bytes)
 {
  _OS.write((uint8_t) (bytes & 0xff));
  _OS.write((uint8_t) ((bytes >> 8) & 0xff));
 }
 void R600CodeEmitter::Emit(uint32_t value)
 {
  for (unsigned i = 0; i < 4; i++) {
    _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
  }
 }
 void R600CodeEmitter::Emit(uint64_t value)
 {
  for (unsigned i = 0; i < 8; i++) {
    EmitByte((value >> (8 * i)) & 0xff);
  }
 }
 unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
 {
  unsigned HWReg;
  HWReg = TRI->getEncodingValue(regNo);
  if (AMDGPU::R600_CReg32RegClass.contains(regNo)) {
    HWReg += 512;
  }
  return HWReg;
 }
 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const
 {
  if (MO.isReg()) {
    return getHWReg(MO.getReg());
  } else {
    return MO.getImm();
  }
 }
 #include "AMDGPUGenCodeEmitter.inc"
--- a/lib/Target/AMDGPU/R600GenRegisterInfo.pl
+++ b/lib/Target/AMDGPU/R600GenRegisterInfo.pl
@ -0,0 +1,190 @@
 #===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
 #
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
 #
 #===------------------------------------------------------------------------===#
 #
 # This perl script prints to stdout .td code to be used as R600RegisterInfo.td
 # it also generates a file called R600HwRegInfo.include, which contains helper
 # functions for determining the hw encoding of registers.
 #
 #===------------------------------------------------------------------------===#
 use strict;
 use warnings;
 use constant CONST_REG_COUNT => 100;
 use constant TEMP_REG_COUNT => 128;
 my $CREG_MAX = CONST_REG_COUNT - 1;
 my $TREG_MAX = TEMP_REG_COUNT - 1;
 print <<STRING;
 class R600Reg <string name, bits<16> encoding> : Register<name> {
  let Namespace = "AMDGPU";
  let HWEncoding = encoding;
 }
 class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
    RegisterWithSubRegs<n, subregs> {
  let Namespace = "AMDGPU";
  let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
  let HWEncoding = encoding;
 }
 STRING
 my $i;
 ### REG DEFS ###
 my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
 my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
 my @t128reg;
 my @treg_x;
 for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
  my $name = "T$i\_XYZW";
  print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W], $i >;\n};
  $t128reg[$i] = $name;
  $treg_x[$i] = "T$i\_X";
  if ($i % 10 == 0) {
    $t128reg[$i] .= "\n";
    $treg_x[$i] .= "\n";
  }
 }
 my $treg_string = join(",", @treg_list);
 my $creg_list = join(",", @creg_list);
 my $t128_string = join(",", @t128reg);
 my $treg_x_string = join(",", @treg_x);
 print <<STRING;
 class RegSet <dag s> {
  dag set = s;
 }
 def ZERO : R600Reg<"0.0", 248>;
 def ONE : R600Reg<"1.0", 249>;
 def NEG_ONE : R600Reg<"-1.0", 249>;
 def ONE_INT : R600Reg<"1", 250>;
 def HALF : R600Reg<"0.5", 252>;
 def NEG_HALF : R600Reg<"-0.5", 252>;
 def PV_X : R600Reg<"pv.x", 254>;
 def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
 def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
    $creg_list)>;
 def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
    $treg_string)>;
 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
    $treg_x_string)>;
 def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
    R600_TReg32,
    R600_CReg32,
    ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
 def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
    $t128_string)>
 {
  let CopyCost = -1;
 }
 STRING
 my %index_map;
 my %chan_map;
 for ($i = 0; $i <= $#creg_list; $i++) {
  push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
  push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
 }
 for ($i = 0; $i <= $#treg_list; $i++) {
  push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
  push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
 }
 for ($i = 0; $i <= $#t128reg; $i++) {
  push(@{$index_map{$i}}, $t128reg[$i]);
  push(@{$chan_map{'X'}}, $t128reg[$i]);
 }
 open(OUTFILE, ">", "R600HwRegInfo.include");
 print OUTFILE <<STRING;
 unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
 {
  switch(reg) {
  default: assert(!"Unknown register"); return 0;
 STRING
 foreach my $key (keys(%chan_map)) {
  foreach my $reg (@{$chan_map{$key}}) {
    chomp($reg);
    print OUTFILE " case AMDGPU::$reg:\n";
  }
  my $val;
  if ($key eq 'X') {
    $val = 0;
  } elsif ($key eq 'Y') {
    $val = 1;
  } elsif ($key eq 'Z') {
    $val = 2;
  } elsif ($key eq 'W') {
    $val = 3;
  } else {
    die("Unknown chan value; $key");
  }
  print OUTFILE "    return $val;\n\n";
 }
 print OUTFILE "  }\n}\n\n";
 sub print_reg_defs {
  my ($count, $prefix) = @_;
  my @reg_list;
  for ($i = 0; $i < $count; $i++) {
    my $hw_index = get_hw_index($i);
    my $chan= get_chan_str($i);
    my $name = "$prefix$hw_index\_$chan";
    print qq{def $name : R600Reg <"$prefix$hw_index.$chan", $hw_index>;\n};
    $reg_list[$i] = $name;
    if ($i % 10 == 0) {
        $reg_list[$i] .= "\n";
    }
  }
  return @reg_list;
 }
 #Helper functions
 sub get_hw_index {
  my ($index) = @_;
  return int($index / 4);
 }
 sub get_chan_str {
  my ($index) = @_;
  my $chan = $index % 4;
  if ($chan == 0 )  {
    return 'X';
  } elsif ($chan == 1) {
    return 'Y';
  } elsif ($chan == 2) {
    return 'Z';
  } elsif ($chan == 3) {
    return 'W';
  } else {
    die("Unknown chan value: $chan");
  }
 }
--- a/lib/Target/AMDGPU/R600HwRegInfo.include
+++ b/lib/Target/AMDGPU/R600HwRegInfo.include
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@ -0,0 +1,286 @@
 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
 // is mostly EmitInstrWithCustomInserter().
 //
 //===----------------------------------------------------------------------===//
 #include "R600ISelLowering.h"
 #include "AMDGPUUtil.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
    AMDGPUTargetLowering(TM),
    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
 {
  setOperationAction(ISD::MUL, MVT::i64, Expand);
  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
  computeRegisterProperties();
  setOperationAction(ISD::FSUB, MVT::f32, Expand);
  setOperationAction(ISD::ROTL, MVT::i32, Custom);
  setSchedulingPreference(Sched::VLIW);
 }
 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
    MachineInstr * MI, MachineBasicBlock * BB) const
 {
  MachineFunction * MF = BB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock::iterator I = *MI;
  switch (MI->getOpcode()) {
  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  case AMDGPU::TGID_X:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
    break;
  case AMDGPU::TGID_Y:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
    break;
  case AMDGPU::TGID_Z:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
    break;
  case AMDGPU::TIDIG_X:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
    break;
  case AMDGPU::TIDIG_Y:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
    break;
  case AMDGPU::TIDIG_Z:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
    break;
  case AMDGPU::NGROUPS_X:
    lowerImplicitParameter(MI, *BB, MRI, 0);
    break;
  case AMDGPU::NGROUPS_Y:
    lowerImplicitParameter(MI, *BB, MRI, 1);
    break;
  case AMDGPU::NGROUPS_Z:
    lowerImplicitParameter(MI, *BB, MRI, 2);
    break;
  case AMDGPU::GLOBAL_SIZE_X:
    lowerImplicitParameter(MI, *BB, MRI, 3);
    break;
  case AMDGPU::GLOBAL_SIZE_Y:
    lowerImplicitParameter(MI, *BB, MRI, 4);
    break;
  case AMDGPU::GLOBAL_SIZE_Z:
    lowerImplicitParameter(MI, *BB, MRI, 5);
    break;
  case AMDGPU::LOCAL_SIZE_X:
    lowerImplicitParameter(MI, *BB, MRI, 6);
    break;
  case AMDGPU::LOCAL_SIZE_Y:
    lowerImplicitParameter(MI, *BB, MRI, 7);
    break;
  case AMDGPU::LOCAL_SIZE_Z:
    lowerImplicitParameter(MI, *BB, MRI, 8);
    break;
  case AMDGPU::CLAMP_R600:
    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
           .addOperand(MI->getOperand(0))
           .addOperand(MI->getOperand(1));
    break;
  case AMDGPU::FABS_R600:
    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
           .addOperand(MI->getOperand(0))
           .addOperand(MI->getOperand(1));
    break;
  case AMDGPU::FNEG_R600:
    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
            .addOperand(MI->getOperand(0))
            .addOperand(MI->getOperand(1));
    break;
  case AMDGPU::R600_LOAD_CONST:
    {
      int64_t RegIndex = MI->getOperand(1).getImm();
      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
                  .addOperand(MI->getOperand(0))
                  .addReg(ConstantReg);
      break;
    }
  case AMDGPU::LOAD_INPUT:
    {
      int64_t RegIndex = MI->getOperand(1).getImm();
      addLiveIn(MI, MF, MRI, TII,
                AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
      break;
    }
  case AMDGPU::MASK_WRITE:
    {
      unsigned maskedRegister = MI->getOperand(0).getReg();
      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
      def->addTargetFlag(MO_FLAG_MASK);
      // Return early so the instruction is not erased
      return BB;
    }
  case AMDGPU::RAT_WRITE_CACHELESS_eg:
    {
      // Convert to DWORD address
      unsigned NewAddr = MRI.createVirtualRegister(
                                             &AMDGPU::R600_TReg32_XRegClass);
      unsigned ShiftValue = MRI.createVirtualRegister(
                                              &AMDGPU::R600_TReg32RegClass);
      // XXX In theory, we should be able to pass ShiftValue directly to
      // the LSHR_eg instruction as an inline literal, but I tried doing it
      // this way and it didn't produce the correct results.
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
              .addReg(AMDGPU::ALU_LITERAL_X)
              .addImm(2);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
              .addOperand(MI->getOperand(1))
              .addReg(ShiftValue);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
              .addOperand(MI->getOperand(0))
              .addReg(NewAddr);
      break;
    }
  case AMDGPU::STORE_OUTPUT:
    {
      int64_t OutputIndex = MI->getOperand(1).getImm();
      unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
                  .addOperand(MI->getOperand(0));
      if (!MRI.isLiveOut(OutputReg)) {
        MRI.addLiveOut(OutputReg);
      }
      break;
    }
  case AMDGPU::RESERVE_REG:
    {
      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
      int64_t ReservedIndex = MI->getOperand(0).getImm();
      unsigned ReservedReg =
                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
      MFI->ReservedRegs.push_back(ReservedReg);
      break;
    }
  case AMDGPU::TXD:
    {
      unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
      unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
              .addOperand(MI->getOperand(3))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
              .addOperand(MI->getOperand(2))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
              .addOperand(MI->getOperand(0))
              .addOperand(MI->getOperand(1))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5))
              .addReg(t0, RegState::Implicit)
              .addReg(t1, RegState::Implicit);
      break;
    }
  case AMDGPU::TXD_SHADOW:
    {
      unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
      unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
              .addOperand(MI->getOperand(3))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
              .addOperand(MI->getOperand(2))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
              .addOperand(MI->getOperand(0))
              .addOperand(MI->getOperand(1))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5))
              .addReg(t0, RegState::Implicit)
              .addReg(t1, RegState::Implicit);
      break;
    }
  }
  MI->eraseFromParent();
  return BB;
 }
 void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
    MachineRegisterInfo & MRI, unsigned dword_offset) const
 {
  MachineBasicBlock::iterator I = *MI;
  unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
          .addReg(AMDGPU::ALU_LITERAL_X)
          .addImm(dword_offset * 4);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
          .addOperand(MI->getOperand(0))
          .addReg(PtrReg)
          .addImm(0);
 }
 //===----------------------------------------------------------------------===//
 // Custom DAG Lowering Operations
 //===----------------------------------------------------------------------===//
 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
  switch (Op.getOpcode()) {
  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
  case ISD::ROTL: return LowerROTL(Op, DAG);
  }
 }
 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
 {
  DebugLoc DL = Op.getDebugLoc();
  EVT VT = Op.getValueType();
  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
                     Op.getOperand(0),
                     Op.getOperand(0),
                     DAG.getNode(ISD::SUB, DL, VT,
                                 DAG.getConstant(32, MVT::i32),
                                 Op.getOperand(1)));
 }
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@ -0,0 +1,48 @@
 //===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // R600 DAG Lowering interface definition
 //
 //===----------------------------------------------------------------------===//
 #ifndef R600ISELLOWERING_H
 #define R600ISELLOWERING_H
 #include "AMDGPUISelLowering.h"
 namespace llvm {
 class R600InstrInfo;
 class R600TargetLowering : public AMDGPUTargetLowering
 {
 public:
  R600TargetLowering(TargetMachine &TM);
  virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
      MachineBasicBlock * BB) const;
  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 private:
  const R600InstrInfo * TII;
  /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
  /// that are stored in the first nine dwords of a Vertex Buffer.  These
  /// implicit parameters are represented by pseudo instructions, which are
  /// lowered to VTX_READ instructions by this function. 
  void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
      MachineRegisterInfo & MRI, unsigned dword_offset) const;
  /// LowerROTL - Lower ROTL opcode to BITALIGN
  SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
 };
 } // End namespace llvm;
 #endif // R600ISELLOWERING_H
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@ -0,0 +1,105 @@
 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // R600 Implementation of TargetInstrInfo.
 //
 //===----------------------------------------------------------------------===//
 #include "R600InstrInfo.h"
 #include "AMDGPUTargetMachine.h"
 #include "AMDILSubtarget.h"
 #include "R600RegisterInfo.h"
 #define GET_INSTRINFO_CTOR
 #include "AMDGPUGenDFAPacketizer.inc"
 using namespace llvm;
 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
  : AMDGPUInstrInfo(tm),
    RI(tm, *this)
  { }
 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
 {
  return RI;
 }
 bool R600InstrInfo::isTrig(const MachineInstr &MI) const
 {
  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
 }
 bool R600InstrInfo::isVector(const MachineInstr &MI) const
 {
  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
 }
 void
 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI, DebugLoc DL,
                           unsigned DestReg, unsigned SrcReg,
                           bool KillSrc) const
 {
  unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y,
                           AMDGPU::sel_z, AMDGPU::sel_w};
  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
    for (unsigned i = 0; i < 4; i++) {
      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
              .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
              .addReg(RI.getSubReg(SrcReg, subRegMap[i]))
              .addReg(DestReg, RegState::Define | RegState::Implicit);
    }
  } else {
    /* We can't copy vec4 registers */
    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
      .addReg(SrcReg, getKillRegState(KillSrc));
  }
 }
 MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
                                             unsigned DstReg, int64_t Imm) const
 {
  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
  MachineInstrBuilder(MI).addImm(Imm);
  return MI;
 }
 unsigned R600InstrInfo::getIEQOpcode() const
 {
  return AMDGPU::SETE_INT;
 }
 bool R600InstrInfo::isMov(unsigned Opcode) const
 {
  switch(Opcode) {
  default: return false;
  case AMDGPU::MOV:
  case AMDGPU::MOV_IMM_F32:
  case AMDGPU::MOV_IMM_I32:
    return true;
  }
 }
 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
    const ScheduleDAG *DAG) const
 {
  const InstrItineraryData *II = TM->getInstrItineraryData();
  return TM->getSubtarget<AMDILSubtarget>().createDFAPacketizer(II);
 }
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@ -0,0 +1,75 @@
 //===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Interface definition for R600InstrInfo
 //
 //===----------------------------------------------------------------------===//
 #ifndef R600INSTRUCTIONINFO_H_
 #define R600INSTRUCTIONINFO_H_
 #include "AMDIL.h"
 #include "AMDILInstrInfo.h"
 #include "R600RegisterInfo.h"
 #include <map>
 namespace llvm {
  class AMDGPUTargetMachine;
  class DFAPacketizer;
  class ScheduleDAG;
  class MachineFunction;
  class MachineInstr;
  class MachineInstrBuilder;
  class R600InstrInfo : public AMDGPUInstrInfo {
  private:
  const R600RegisterInfo RI;
  public:
  explicit R600InstrInfo(AMDGPUTargetMachine &tm);
  const R600RegisterInfo &getRegisterInfo() const;
  virtual void copyPhysReg(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI, DebugLoc DL,
                           unsigned DestReg, unsigned SrcReg,
                           bool KillSrc) const;
  bool isTrig(const MachineInstr &MI) const;
  /// isVector - Vector instructions are instructions that must fill all
  /// instruction slots within an instruction group.
  bool isVector(const MachineInstr &MI) const;
  virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
                                        int64_t Imm) const;
  virtual unsigned getIEQOpcode() const;
  virtual bool isMov(unsigned Opcode) const;
  DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
                                           const ScheduleDAG *DAG) const;
 };
 } // End llvm namespace
 namespace R600_InstFlag {
 	enum TIF {
 		TRANS_ONLY = (1 << 0),
 		TEX = (1 << 1),
 		REDUCTION = (1 << 2),
 		FC = (1 << 3),
 		TRIG = (1 << 4),
 		OP3 = (1 << 5),
 		VECTOR = (1 << 6)
 	};
 }
 #endif // R600INSTRINFO_H_
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@ -0,0 +1,16 @@
 //===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // R600 Intrinsic Definitions
 //
 //===----------------------------------------------------------------------===//
 let TargetPrefix = "R600", isTarget = 1 in {
  def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
 }
--- a/lib/Target/AMDGPU/R600KernelParameters.cpp
+++ b/lib/Target/AMDGPU/R600KernelParameters.cpp
@ -0,0 +1,462 @@
 //===-- R600KernelParameters.cpp - Lower kernel function arguments --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass lowers kernel function arguments to loads from the vertex buffer.
 //
 // Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords,
 // so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from
 // VTX_BUFFER[10], etc.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
 #include "AMDIL.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/TypeBuilder.h"
 #include <map>
 #include <set>
 using namespace llvm;
 namespace {
 #define CONSTANT_CACHE_SIZE_DW 127
 class R600KernelParameters : public FunctionPass {
  const TargetData *TD;
  LLVMContext* Context;
  Module *Mod;
  struct Param {
    Param() : Val(NULL), PtrVal(NULL), OffsetInDW(0), SizeInDW(0),
              IsIndirect(true), SpecialID(0) {}
    Value* Val;
    Value* PtrVal;
    int OffsetInDW;
    int SizeInDW;
    bool IsIndirect;
    std::string SpecialType;
    int SpecialID;
    int End() { return OffsetInDW + SizeInDW; }
    // The first 9 dwords are reserved for the grid sizes.
    int getRatOffset() { return 9 + OffsetInDW; }
  };
  std::vector<Param> Params;
  bool IsOpenCLKernel(const Function *Fun);
  int getLastSpecialID(const std::string& TypeName);
  int getListSize();
  void AddParam(Argument *Arg);
  int CalculateArgumentSize(Argument *Arg);
  void RunAna(Function *Fun);
  void Replace(Function *Fun);
  bool IsIndirect(Value *Val, std::set<Value*> &Visited);
  void Propagate(Function* Fun);
  void Propagate(Value *V, const Twine &Name, bool IsIndirect = true);
  Value* ConstantRead(Function *Fun, Param &P);
  Value* handleSpecial(Function *Fun, Param &P);
  bool IsSpecialType(Type *T);
  std::string getSpecialTypeName(Type *T);
 public:
  static char ID;
  R600KernelParameters() : FunctionPass(ID) {}
  R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {}
  bool runOnFunction (Function &F);
  void getAnalysisUsage(AnalysisUsage &AU) const;
  const char *getPassName() const;
  bool doInitialization(Module &M);
  bool doFinalization(Module &M);
 };
 char R600KernelParameters::ID = 0;
 static RegisterPass<R600KernelParameters> X("kerparam",
                            "OpenCL Kernel Parameter conversion", false, false);
 bool R600KernelParameters::IsOpenCLKernel(const Function* Fun) {
  Module *Mod = const_cast<Function*>(Fun)->getParent();
  NamedMDNode * MD = Mod->getOrInsertNamedMetadata("opencl.kernels");
  if (!MD or !MD->getNumOperands()) {
    return false;
  }
  for (int i = 0; i < int(MD->getNumOperands()); i++) {
    if (!MD->getOperand(i) or !MD->getOperand(i)->getOperand(0)) {
      continue;
    }
    assert(MD->getOperand(i)->getNumOperands() == 1);
    if (MD->getOperand(i)->getOperand(0)->getName() == Fun->getName()) {
      return true;
    }
  }
  return false;
 }
 int R600KernelParameters::getLastSpecialID(const std::string &TypeName) {
  int LastID = -1;
  for (std::vector<Param>::iterator i = Params.begin(); i != Params.end(); i++) {
    if (i->SpecialType == TypeName) {
      LastID = i->SpecialID;
    }
  }
  return LastID;
 }
 int R600KernelParameters::getListSize() {
  if (Params.size() == 0) {
    return 0;
  }
  return Params.back().End();
 }
 bool R600KernelParameters::IsIndirect(Value *Val, std::set<Value*> &Visited) {
  //XXX Direct parameters are not supported yet, so return true here.
  return true;
 #if 0
  if (isa<LoadInst>(Val)) {
    return false;
  }
  if (isa<IntegerType>(Val->getType())) {
    assert(0 and "Internal error");
    return false;
  }
  if (Visited.count(Val)) {
    return false;
  }
  Visited.insert(Val);
  if (isa<getElementPtrInst>(Val)) {
    getElementPtrInst* GEP = dyn_cast<getElementPtrInst>(Val);
    getElementPtrInst::op_iterator I = GEP->op_begin();
    for (++I; I != GEP->op_end(); ++I) {
      if (!isa<Constant>(*I)) {
        return true;
      }
    }
  }
  for (Value::use_iterator I = Val->use_begin(); i != Val->use_end(); ++I) {
    Value* V2 = dyn_cast<Value>(*I);
    if (V2) {
      if (IsIndirect(V2, Visited)) {
        return true;
      }
    }
  }
  return false;
 #endif
 }
 void R600KernelParameters::AddParam(Argument *Arg) {
  Param P;
  P.Val = dyn_cast<Value>(Arg);
  P.OffsetInDW = getListSize();
  P.SizeInDW = CalculateArgumentSize(Arg);
  if (isa<PointerType>(Arg->getType()) and Arg->hasByValAttr()) {
    std::set<Value*> Visited;
    P.IsIndirect = IsIndirect(P.Val, Visited);
  }
  Params.push_back(P);
 }
 int R600KernelParameters::CalculateArgumentSize(Argument *Arg) {
  Type* T = Arg->getType();
  if (Arg->hasByValAttr() and dyn_cast<PointerType>(T)) {
    T = dyn_cast<PointerType>(T)->getElementType();
  }
  int StoreSizeInDW = (TD->getTypeStoreSize(T) + 3)/4;
  assert(StoreSizeInDW);
  return StoreSizeInDW;
 }
 void R600KernelParameters::RunAna(Function* Fun) {
  assert(IsOpenCLKernel(Fun));
  for (Function::arg_iterator I = Fun->arg_begin(); I != Fun->arg_end(); ++I) {
    AddParam(I);
  }
 }
 void R600KernelParameters::Replace(Function* Fun) {
  for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
    Value *NewVal;
    if (IsSpecialType(I->Val->getType())) {
      NewVal = handleSpecial(Fun, *I);
    } else {
      NewVal = ConstantRead(Fun, *I);
    }
    if (NewVal) {
      I->Val->replaceAllUsesWith(NewVal);
    }
  }
 }
 void R600KernelParameters::Propagate(Function* Fun) {
  for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
    if (I->PtrVal) {
      Propagate(I->PtrVal, I->Val->getName(), I->IsIndirect);
    }
  }
 }
 void R600KernelParameters::Propagate(Value* V, const Twine& Name, bool IsIndirect) {
  LoadInst* Load = dyn_cast<LoadInst>(V);
  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
  unsigned Addrspace;
  if (IsIndirect) {
    Addrspace = AMDILAS::PARAM_I_ADDRESS;
  }  else {
    Addrspace = AMDILAS::PARAM_D_ADDRESS;
  }
  if (GEP and GEP->getType()->getAddressSpace() != Addrspace) {
    Value *Op = GEP->getPointerOperand();
    if (dyn_cast<PointerType>(Op->getType())->getAddressSpace() != Addrspace) {
      Op = new BitCastInst(Op, PointerType::get(dyn_cast<PointerType>(
                           Op->getType())->getElementType(), Addrspace),
                           Name, dyn_cast<Instruction>(V));
    }
    std::vector<Value*> Params(GEP->idx_begin(), GEP->idx_end());
    GetElementPtrInst* GEP2 = GetElementPtrInst::Create(Op, Params, Name,
                                                      dyn_cast<Instruction>(V));
    GEP2->setIsInBounds(GEP->isInBounds());
    V = dyn_cast<Value>(GEP2);
    GEP->replaceAllUsesWith(GEP2);
    GEP->eraseFromParent();
    Load = NULL;
  }
  if (Load) {
    ///normally at this point we have the right address space
    if (Load->getPointerAddressSpace() != Addrspace) {
      Value *OrigPtr = Load->getPointerOperand();
      PointerType *OrigPtrType = dyn_cast<PointerType>(OrigPtr->getType());
      Type* NewPtrType = PointerType::get(OrigPtrType->getElementType(),
                                            Addrspace);
      Value* NewPtr = OrigPtr;
      if (OrigPtr->getType() != NewPtrType) {
        NewPtr = new BitCastInst(OrigPtr, NewPtrType, "prop_cast", Load);
      }
      Value* new_Load = new LoadInst(NewPtr, Name, Load);
      Load->replaceAllUsesWith(new_Load);
      Load->eraseFromParent();
    }
    return;
  }
  std::vector<User*> Users(V->use_begin(), V->use_end());
  for (int i = 0; i < int(Users.size()); i++) {
    Value* V2 = dyn_cast<Value>(Users[i]);
    if (V2) {
      Propagate(V2, Name, IsIndirect);
    }
  }
 }
 Value* R600KernelParameters::ConstantRead(Function *Fun, Param &P) {
  assert(Fun->front().begin() != Fun->front().end());
  Instruction *FirstInst = Fun->front().begin();
  IRBuilder <> Builder (FirstInst);
 /* First 3 dwords are reserved for the dimmension info */
  if (!P.Val->hasNUsesOrMore(1)) {
    return NULL;
  }
  unsigned Addrspace;
  if (P.IsIndirect) {
    Addrspace = AMDILAS::PARAM_I_ADDRESS;
  } else {
    Addrspace = AMDILAS::PARAM_D_ADDRESS;
  }
  Argument *Arg = dyn_cast<Argument>(P.Val);
  Type * ArgType = P.Val->getType();
  PointerType * ArgPtrType = dyn_cast<PointerType>(P.Val->getType());
  if (ArgPtrType and Arg->hasByValAttr()) {
    Value* ParamAddrSpacePtr = ConstantPointerNull::get(
                                    PointerType::get(Type::getInt32Ty(*Context),
                                    Addrspace));
    Value* ParamPtr = GetElementPtrInst::Create(ParamAddrSpacePtr,
                                    ConstantInt::get(Type::getInt32Ty(*Context),
                                    P.getRatOffset()), Arg->getName(),
                                    FirstInst);
    ParamPtr = new BitCastInst(ParamPtr,
                                PointerType::get(ArgPtrType->getElementType(),
                                                 Addrspace),
                                Arg->getName(), FirstInst);
    P.PtrVal = ParamPtr;
    return ParamPtr;
  } else {
    Value *ParamAddrSpacePtr = ConstantPointerNull::get(PointerType::get(
                                                        ArgType, Addrspace));
    Value *ParamPtr = Builder.CreateGEP(ParamAddrSpacePtr,
             ConstantInt::get(Type::getInt32Ty(*Context), P.getRatOffset()),
                              Arg->getName());
    Value *Param_Value = Builder.CreateLoad(ParamPtr, Arg->getName());
    return Param_Value;
  }
 }
 Value* R600KernelParameters::handleSpecial(Function* Fun, Param& P) {
  std::string Name = getSpecialTypeName(P.Val->getType());
  int ID;
  assert(!Name.empty());
  if (Name == "image2d_t" or Name == "image3d_t") {
    int LastID = std::max(getLastSpecialID("image2d_t"),
                     getLastSpecialID("image3d_t"));
    if (LastID == -1) {
      ID = 2; ///ID0 and ID1 are used internally by the driver
    } else {
      ID = LastID + 1;
    }
  } else if (Name == "sampler_t") {
    int LastID = getLastSpecialID("sampler_t");
    if (LastID == -1) {
      ID = 0;
    } else {
      ID = LastID + 1;
    }
  } else {
    ///TODO: give some error message
    return NULL;
  }
  P.SpecialType = Name;
  P.SpecialID = ID;
  Instruction *FirstInst = Fun->front().begin();
  return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context),
                                           P.SpecialID), P.Val->getType(),
                                           "resourceID", FirstInst);
 }
 bool R600KernelParameters::IsSpecialType(Type* T) {
  return !getSpecialTypeName(T).empty();
 }
 std::string R600KernelParameters::getSpecialTypeName(Type* T) {
  PointerType *PT = dyn_cast<PointerType>(T);
  StructType *ST = NULL;
  if (PT) {
    ST = dyn_cast<StructType>(PT->getElementType());
  }
  if (ST) {
    std::string Prefix = "struct.opencl_builtin_type_";
    std::string Name = ST->getName().str();
    if (Name.substr(0, Prefix.length()) == Prefix) {
      return Name.substr(Prefix.length(), Name.length());
    }
  }
  return "";
 }
 bool R600KernelParameters::runOnFunction (Function &F) {
  if (!IsOpenCLKernel(&F)) {
    return false;
  }
  RunAna(&F);
  Replace(&F);
  Propagate(&F);
  return false;
 }
 void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const {
  FunctionPass::getAnalysisUsage(AU);
  AU.setPreservesAll();
 }
 const char *R600KernelParameters::getPassName() const {
  return "OpenCL Kernel parameter conversion to memory";
 }
 bool R600KernelParameters::doInitialization(Module &M) {
  Context = &M.getContext();
  Mod = &M;
  return false;
 }
 bool R600KernelParameters::doFinalization(Module &M) {
  return false;
 }
 } // End anonymous namespace
 FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) {
  return new R600KernelParameters(TD);
 }
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@ -0,0 +1,16 @@
 //===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 #include "R600MachineFunctionInfo.h"
 using namespace llvm;
 R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
  : MachineFunctionInfo()
  { }
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@ -0,0 +1,33 @@
 //===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // R600MachineFunctionInfo is used for keeping track of which registers have
 // been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
 //
 //===----------------------------------------------------------------------===//
 #ifndef R600MACHINEFUNCTIONINFO_H
 #define R600MACHINEFUNCTIONINFO_H
 #include "llvm/CodeGen/MachineFunction.h"
 #include <vector>
 namespace llvm {
 class R600MachineFunctionInfo : public MachineFunctionInfo {
 public:
  R600MachineFunctionInfo(const MachineFunction &MF);
  std::vector<unsigned> ReservedRegs;
 };
 } // End llvm namespace
 #endif //R600MACHINEFUNCTIONINFO_H
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@ -0,0 +1,88 @@
 //===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // The file contains the R600 implementation of the TargetRegisterInfo class.
 //
 //===----------------------------------------------------------------------===//
 #include "R600RegisterInfo.h"
 #include "AMDGPUTargetMachine.h"
 #include "R600MachineFunctionInfo.h"
 using namespace llvm;
 R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
    const TargetInstrInfo &tii)
 : AMDGPURegisterInfo(tm, tii),
  TM(tm),
  TII(tii)
  { }
 BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
 {
  BitVector Reserved(getNumRegs());
  const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
  Reserved.set(AMDGPU::ZERO);
  Reserved.set(AMDGPU::HALF);
  Reserved.set(AMDGPU::ONE);
  Reserved.set(AMDGPU::ONE_INT);
  Reserved.set(AMDGPU::NEG_HALF);
  Reserved.set(AMDGPU::NEG_ONE);
  Reserved.set(AMDGPU::PV_X);
  Reserved.set(AMDGPU::ALU_LITERAL_X);
  for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
                        E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
    Reserved.set(*I);
  }
  for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
                                    E = MFI->ReservedRegs.end(); I != E; ++I) {
    Reserved.set(*I);
  }
  return Reserved;
 }
 const TargetRegisterClass *
 R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
 {
  switch (rc->getID()) {
  case AMDGPU::GPRF32RegClassID:
  case AMDGPU::GPRI32RegClassID:
    return &AMDGPU::R600_Reg32RegClass;
  default: return rc;
  }
 }
 unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
 {
  switch(reg) {
  case AMDGPU::ZERO:
  case AMDGPU::ONE:
  case AMDGPU::ONE_INT:
  case AMDGPU::NEG_ONE:
  case AMDGPU::HALF:
  case AMDGPU::NEG_HALF:
  case AMDGPU::ALU_LITERAL_X:
    return 0;
  default: return getHWRegChanGen(reg);
  }
 }
 const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
                                                                   MVT VT) const
 {
  switch(VT.SimpleTy) {
  default:
  case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
  }
 }
 #include "R600HwRegInfo.include"
--- a/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@ -0,0 +1,54 @@
 //===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Interface definition for R600RegisterInfo
 //
 //===----------------------------------------------------------------------===//
 #ifndef R600REGISTERINFO_H_
 #define R600REGISTERINFO_H_
 #include "AMDGPUTargetMachine.h"
 #include "AMDILRegisterInfo.h"
 namespace llvm {
 class R600TargetMachine;
 class TargetInstrInfo;
 struct R600RegisterInfo : public AMDGPURegisterInfo
 {
  AMDGPUTargetMachine &TM;
  const TargetInstrInfo &TII;
  R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
  virtual BitVector getReservedRegs(const MachineFunction &MF) const;
  /// getISARegClass - rc is an AMDIL reg class.  This function returns the
  /// R600 reg class that is equivalent to the given AMDIL reg class.
  virtual const TargetRegisterClass * getISARegClass(
    const TargetRegisterClass * rc) const;
  /// getHWRegChan - get the HW encoding for a register's channel.
  unsigned getHWRegChan(unsigned reg) const;
  /// getCFGStructurizerRegClass - get the register class of the specified
  /// type to use in the CFGStructurizer
  virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
 private:
  /// getHWRegChanGen - Generated function returns a register's channel
  /// encoding.
  unsigned getHWRegChanGen(unsigned reg) const;
 };
 } // End namespace llvm
 #endif // AMDIDSAREGISTERINFO_H_
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
--- a/lib/Target/AMDGPU/R600Schedule.td
+++ b/lib/Target/AMDGPU/R600Schedule.td
@ -0,0 +1,36 @@
 //===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // R600 has a VLIW architecture.  On pre-cayman cards there are 5 instruction
 // slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS.  For cayman cards, the TRANS
 // slot has been removed. 
 //
 //===----------------------------------------------------------------------===//
 def ALU_X : FuncUnit;
 def ALU_Y : FuncUnit;
 def ALU_Z : FuncUnit;
 def ALU_W : FuncUnit;
 def TRANS : FuncUnit;
 def AnyALU : InstrItinClass;
 def VecALU : InstrItinClass;
 def TransALU : InstrItinClass;
 def R600_EG_Itin : ProcessorItineraries <
  [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
  [],
  [
    InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
    InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
  ]
 >;
--- a/lib/Target/AMDGPU/SIAssignInterpRegs.cpp
+++ b/lib/Target/AMDGPU/SIAssignInterpRegs.cpp
@ -0,0 +1,117 @@
 //===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass maps the pseudo interpolation registers to the correct physical
 // registers.  Prior to executing a fragment shader, the GPU loads interpolation
 // parameters into physical registers.  The specific physical register that each
 // interpolation parameter ends up in depends on the type of the interpolation
 // parameter as well as how many interpolation parameters are used by the
 // shader.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
 #include "AMDGPUUtil.h"
 #include "AMDIL.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 namespace {
 class SIAssignInterpRegsPass : public MachineFunctionPass {
 private:
  static char ID;
  TargetMachine &TM;
 public:
  SIAssignInterpRegsPass(TargetMachine &tm) :
    MachineFunctionPass(ID), TM(tm) { }
  virtual bool runOnMachineFunction(MachineFunction &MF);
  const char *getPassName() const { return "SI Assign intrpolation registers"; }
 };
 } // End anonymous namespace
 char SIAssignInterpRegsPass::ID = 0;
 #define INTERP_VALUES 16
 struct interp_info {
  bool enabled;
  unsigned regs[3];
  unsigned reg_count;
 };
 FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
  return new SIAssignInterpRegsPass(tm);
 }
 bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
 {
  struct interp_info InterpUse[INTERP_VALUES] = {
    {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
    {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
    {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
    {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
    {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
    {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
    {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
    {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
    {false, {AMDGPU::POS_X_FLOAT}, 1},
    {false, {AMDGPU::POS_Y_FLOAT}, 1},
    {false, {AMDGPU::POS_Z_FLOAT}, 1},
    {false, {AMDGPU::POS_W_FLOAT}, 1},
    {false, {AMDGPU::FRONT_FACE}, 1},
    {false, {AMDGPU::ANCILLARY}, 1},
    {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
    {false, {AMDGPU::POS_FIXED_PT}, 1}
  };
  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
  MachineRegisterInfo &MRI = MF.getRegInfo();
  /* First pass, mark the interpolation values that are used. */
  for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
    for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
                                                               reg_idx++) {
      InterpUse[interp_idx].enabled =
                            !MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]);
    }
  }
  unsigned used_vgprs = 0;
  /* Second pass, replace with VGPRs. */
  for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
    if (!InterpUse[interp_idx].enabled) {
      continue;
    }
    MFI->spi_ps_input_addr |= (1 << interp_idx);
    for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
                                                  reg_idx++, used_vgprs++) {
      unsigned new_reg = AMDGPU::VReg_32RegClass.getRegister(used_vgprs);
      unsigned virt_reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
      MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg);
      AMDGPU::utilAddLiveIn(&MF, MRI, TM.getInstrInfo(), new_reg, virt_reg);
    }
  }
  return false;
 }
--- a/lib/Target/AMDGPU/SICodeEmitter.cpp
+++ b/lib/Target/AMDGPU/SICodeEmitter.cpp
@ -0,0 +1,321 @@
 //===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // The SI code emitter produces machine code that can be executed directly on
 // the GPU device.
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
 #include "AMDGPUUtil.h"
 #include "AMDILCodeEmitter.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetMachine.h"
 #include <stdio.h>
 #define LITERAL_REG 255
 #define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
 using namespace llvm;
 namespace {
  class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
  private:
    static char ID;
    formatted_raw_ostream &_OS;
    const TargetMachine *TM;
    void emitState(MachineFunction & MF);
    void emitInstr(MachineInstr &MI);
    void outputBytes(uint64_t value, unsigned bytes);
    unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift)
                                                                      const;
  public:
    SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
        _OS(OS), TM(NULL) { }
    const char *getPassName() const { return "SI Code Emitter"; }
    bool runOnMachineFunction(MachineFunction &MF);
    /// getMachineOpValue - Return the encoding for MO
    virtual uint64_t getMachineOpValue(const MachineInstr &MI,
                                       const MachineOperand &MO) const;
    /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used 
    virtual unsigned GPR4AlignEncode(const MachineInstr  &MI, unsigned OpNo)
                                                                      const;
    /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
    virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo)
                                                                      const;
    /// i32LiteralEncode - Encode an i32 literal this is used as an operand
    /// for an instruction in place of a register.
    virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
                                                                      const;
    /// SMRDmemriEncode - Encoding for SMRD indexed loads
    virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
                                                                     const;
    /// VOPPostEncode - Post-Encoder method for VOP instructions 
    virtual uint64_t VOPPostEncode(const MachineInstr &MI,
                                   uint64_t Value) const;
  };
 }
 char SICodeEmitter::ID = 0;
 FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) {
  return new SICodeEmitter(OS);
 }
 void SICodeEmitter::emitState(MachineFunction & MF)
 {
  unsigned maxSGPR = 0;
  unsigned maxVGPR = 0;
  bool VCCUsed = false;
  const SIRegisterInfo * RI =
                static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                  BB != BB_E; ++BB) {
    MachineBasicBlock &MBB = *BB;
    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                      I != E; ++I) {
      MachineInstr &MI = *I;
      unsigned numOperands = MI.getNumOperands();
      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
        MachineOperand & MO = MI.getOperand(op_idx);
        unsigned maxUsed;
        unsigned width = 0;
        bool isSGPR = false;
        unsigned reg;
        unsigned hwReg;
        if (!MO.isReg()) {
          continue;
        }
        reg = MO.getReg();
        if (reg == AMDGPU::VCC) {
          VCCUsed = true;
          continue;
        }
        if (AMDGPU::SReg_32RegClass.contains(reg)) {
          isSGPR = true;
          width = 1;
        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
          isSGPR = false;
          width = 1;
        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
          isSGPR = true;
          width = 2;
        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
          isSGPR = false;
          width = 2;
        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
          isSGPR = true;
          width = 4;
        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
          isSGPR = false;
          width = 4;
        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
          isSGPR = true;
          width = 8;
        } else {
          assert("!Unknown register class");
        }
        hwReg = RI->getEncodingValue(reg);
        maxUsed = ((hwReg + 1) * width) - 1;
        if (isSGPR) {
          maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR;
        } else {
          maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR;
        }
      }
    }
  }
  if (VCCUsed) {
    maxSGPR += 2;
  }
  outputBytes(maxSGPR + 1, 4);
  outputBytes(maxVGPR + 1, 4);
  outputBytes(MFI->spi_ps_input_addr, 4);
 }
 bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
 {
  TM = &MF.getTarget();
  const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
  if (STM.dumpCode()) {
    MF.dump();
  }
  emitState(MF);
  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                  BB != BB_E; ++BB) {
    MachineBasicBlock &MBB = *BB;
    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                      I != E; ++I) {
      MachineInstr &MI = *I;
      if (MI.getOpcode() != AMDGPU::KILL && MI.getOpcode() != AMDGPU::RETURN) {
        emitInstr(MI);
      }
    }
  }
  // Emit S_END_PGM
  MachineInstr * End = BuildMI(MF, DebugLoc(),
                               TM->getInstrInfo()->get(AMDGPU::S_ENDPGM));
  emitInstr(*End);
  return false;
 }
 void SICodeEmitter::emitInstr(MachineInstr &MI)
 {
  const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
  uint64_t hwInst = getBinaryCodeForInstr(MI);
  if ((hwInst & 0xffffffff) == 0xffffffff) {
    fprintf(stderr, "Unsupported Instruction: \n");
    MI.dump();
    abort();
  }
  unsigned bytes = SII->getEncodingBytes(MI);
  outputBytes(hwInst, bytes);
 }
 uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                          const MachineOperand &MO) const
 {
  const SIRegisterInfo * RI =
                static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
  switch(MO.getType()) {
  case MachineOperand::MO_Register:
    return RI->getEncodingValue(MO.getReg());
  case MachineOperand::MO_Immediate:
    return MO.getImm();
  case MachineOperand::MO_FPImmediate:
    // XXX: Not all instructions can use inline literals
    // XXX: We should make sure this is a 32-bit constant
    return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32);
  default:
    llvm_unreachable("Encoding of this operand type is not supported yet.");
    break;
  }
 }
 unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo,
    unsigned shift) const
 {
  const SIRegisterInfo * RI =
                static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
  unsigned regCode = RI->getEncodingValue(MI.getOperand(OpNo).getReg());
  return regCode >> shift;
 }
 unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI,
    unsigned OpNo) const
 {
  return GPRAlign(MI, OpNo, 2);
 }
 unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI,
    unsigned OpNo) const
 {
  return GPRAlign(MI, OpNo, 1);
 }
 uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
    unsigned OpNo) const
 {
  return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
 }
 #define SMRD_OFFSET_MASK 0xff
 #define SMRD_IMM_SHIFT 8
 #define SMRD_SBASE_MASK 0x3f
 #define SMRD_SBASE_SHIFT 9
 /// SMRDmemriEncode - This function is responsibe for encoding the offset
 /// and the base ptr for SMRD instructions it should return a bit string in
 /// this format:
 ///
 /// OFFSET = bits{7-0}
 /// IMM    = bits{8}
 /// SBASE  = bits{14-9}
 ///
 uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI,
    unsigned OpNo) const
 {
  uint32_t encoding;
  const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1);
  //XXX: Use this function for SMRD loads with register offsets
  assert(OffsetOp.isImm());
  encoding =
      (getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK)
    | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
    | ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
    ;
  return encoding;
 }
 /// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
 /// XXX: It would be nice if we could handle this without a PostEncode function.
 uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
    uint64_t Value) const
 {
  const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
  unsigned encodingType = SII->getEncodingType(MI);
  unsigned numSrcOps;
  unsigned vgprBitOffset;
  if (encodingType == SIInstrEncodingType::VOP3) {
    numSrcOps = 3;
    vgprBitOffset = 32;
  } else {
    numSrcOps = 1;
    vgprBitOffset = 0;
  }
  // Add one to skip over the destination reg operand.
  for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
    if (!MI.getOperand(opIdx).isReg()) {
      continue;
    }
    unsigned reg = MI.getOperand(opIdx).getReg();
    if (AMDGPU::VReg_32RegClass.contains(reg)
        || AMDGPU::VReg_64RegClass.contains(reg)) {
      Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
    }
  }
  return Value;
 }
 void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes)
 {
  for (unsigned i = 0; i < bytes; i++) {
    _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
  }
 }
--- a/lib/Target/AMDGPU/SIGenRegisterInfo.pl
+++ b/lib/Target/AMDGPU/SIGenRegisterInfo.pl
@ -0,0 +1,224 @@
 #===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
 #
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
 #
 #===------------------------------------------------------------------------===#
 #
 # This perl script prints to stdout .td code to be used as SIRegisterInfo.td
 # it also generates a file called SIHwRegInfo.include, which contains helper
 # functions for determining the hw encoding of registers.
 #
 #===------------------------------------------------------------------------===#
 use strict;
 use warnings;
 my $SGPR_COUNT = 104;
 my $VGPR_COUNT = 256;
 my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
 my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
 my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
 print <<STRING;
 let Namespace = "AMDGPU" in {
  def low : SubRegIndex;
  def high : SubRegIndex;
  def sub0 : SubRegIndex;
  def sub1 : SubRegIndex;
  def sub2 : SubRegIndex;
  def sub3 : SubRegIndex;
  def sub4 : SubRegIndex;
  def sub5 : SubRegIndex;
  def sub6 : SubRegIndex;
  def sub7 : SubRegIndex;
 }
 class SIReg <string n, bits<16> encoding = 0> : Register<n> {
  let Namespace = "AMDGPU";
  let HWEncoding = encoding;
 }
 class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
  let Namespace = "AMDGPU";
  let SubRegIndices = [low, high];
  let HWEncoding = encoding;
 }
 class SI_128 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
  let Namespace = "AMDGPU";
  let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
  let HWEncoding = encoding;
 }
 class SI_256 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
  let Namespace = "AMDGPU";
  let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
  let HWEncoding = encoding;
 }
 class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
 class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
 class SGPR_64 <bits<16> num, string name, list<Register> subregs> :
    SI_64 <name, subregs, num>;
 class VGPR_64 <bits<16> num, string name, list<Register> subregs> :
    SI_64 <name, subregs, num>;
 class SGPR_128 <bits<16> num, string name, list<Register> subregs> :
    SI_128 <name, subregs, num>;
 class VGPR_128 <bits<16> num, string name, list<Register> subregs> :
    SI_128 <name, subregs, num>;
 class SGPR_256 <bits<16> num, string name, list<Register> subregs> :
    SI_256 <name, subregs, num>;
 def VCC : SIReg<"VCC">;
 def SCC : SIReg<"SCC">;
 def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
 def M0 : SIReg <"M0", 124>;
 //Interpolation registers
 def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
 def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
 def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
 def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
 def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
 def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
 def PERSP_I_W : SIReg <"PERSP_I_W">;
 def PERSP_J_W : SIReg <"PERSP_J_W">;
 def PERSP_1_W : SIReg <"PERSP_1_W">;
 def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
 def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
 def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
 def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
 def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
 def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
 def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
 def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
 def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
 def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
 def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
 def FRONT_FACE : SIReg <"FRONT_FACE">;
 def ANCILLARY : SIReg <"ANCILLARY">;
 def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
 def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
 STRING
 #32 bit register
 my @SGPR;
 for (my $i = 0; $i < $SGPR_COUNT; $i++) {
  print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
  $SGPR[$i] = "SGPR$i";
 }
 my @VGPR;
 for (my $i = 0; $i < $VGPR_COUNT; $i++) {
  print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
  $VGPR[$i] = "VGPR$i";
 }
 print <<STRING;
 def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
    (add (sequence "SGPR%u", 0, $SGPR_MAX_IDX),  SREG_LIT_0, M0)
 >;
 def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
    (add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
    PERSP_SAMPLE_I, PERSP_SAMPLE_J,
    PERSP_CENTER_I, PERSP_CENTER_J,
    PERSP_CENTROID_I, PERSP_CENTROID_J,
    PERSP_I_W, PERSP_J_W, PERSP_1_W,
    LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
    LINEAR_CENTER_I, LINEAR_CENTER_J,
    LINEAR_CENTROID_I, LINEAR_CENTROID_J,
    LINE_STIPPLE_TEX_COORD,
    POS_X_FLOAT,
    POS_Y_FLOAT,
    POS_Z_FLOAT,
    POS_W_FLOAT,
    FRONT_FACE,
    ANCILLARY,
    SAMPLE_COVERAGE,
    POS_FIXED_PT
    )
 >;
 def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
    (add VReg_32, SReg_32) 
 >;
 def CCReg : RegisterClass<"AMDGPU", [f32], 32, (add VCC, SCC)>;
 STRING
 my @subregs_64 = ('low', 'high');
 my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
 my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
 my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
 my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
 my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
 my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
 my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
 my $sgpr64_list = join(',', @SGPR64);
 my $vgpr64_list = join(',', @VGPR64);
 print <<STRING;
 def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
    (add $sgpr64_list, $vgpr64_list)
 >;
 STRING
 sub print_sgpr_class {
  my ($reg_width, $sub_reg_ref, @types) = @_;
  return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
 }
 sub print_vgpr_class {
  my ($reg_width, $sub_reg_ref, @types) = @_;
  return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
 }
 sub print_reg_class {
  my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
  my @registers;
  my $component_count = $reg_width / 32;
  for (my $i = 0; $i < $reg_count; $i += $component_count) {
    my $reg_name = $reg_prefix . $i . '_' . $reg_width;
    my @sub_regs;
    for (my $idx = 0; $idx < $component_count; $idx++) {
      my $sub_idx = $i + $idx;
      push(@sub_regs, $reg_prefix . $sub_idx);
    }
    print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
    if ($i % 10 == 0) {
      $reg_name .= "\n";
    }
    push (@registers, $reg_name);
  }
  my $reg_list = join(', ', @registers);
  print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n  (add $reg_list)\n>{\n";
  print "}\n";
  return @registers;
 }
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -0,0 +1,195 @@
 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file is
 // mostly EmitInstrWithCustomInserter().
 //
 //===----------------------------------------------------------------------===//
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "SIRegisterInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 SITargetLowering::SITargetLowering(TargetMachine &TM) :
    AMDGPUTargetLowering(TM),
    TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
 {
  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
  addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
  addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
  addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
  addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
  addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
  computeRegisterProperties();
  setOperationAction(ISD::ADD, MVT::i64, Legal);
  setOperationAction(ISD::ADD, MVT::i32, Legal);
 }
 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    MachineInstr * MI, MachineBasicBlock * BB) const
 {
  const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
  MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
  MachineBasicBlock::iterator I = MI;
  if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
    AppendS_WAITCNT(MI, *BB, llvm::next(I));
    return BB;
  }
  switch (MI->getOpcode()) {
  default:
    return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  case AMDGPU::CLAMP_SI:
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
           .addOperand(MI->getOperand(0))
           .addOperand(MI->getOperand(1))
           // VSRC1-2 are unused, but we still need to fill all the
           // operand slots, so we just reuse the VSRC0 operand
           .addOperand(MI->getOperand(1))
           .addOperand(MI->getOperand(1))
           .addImm(0) // ABS
           .addImm(1) // CLAMP
           .addImm(0) // OMOD
           .addImm(0); // NEG
    MI->eraseFromParent();
    break;
  case AMDGPU::FABS_SI:
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
                 .addOperand(MI->getOperand(0))
                 .addOperand(MI->getOperand(1))
                 // VSRC1-2 are unused, but we still need to fill all the
                 // operand slots, so we just reuse the VSRC0 operand
                 .addOperand(MI->getOperand(1))
                 .addOperand(MI->getOperand(1))
                 .addImm(1) // ABS
                 .addImm(0) // CLAMP
                 .addImm(0) // OMOD
                 .addImm(0); // NEG
    MI->eraseFromParent();
    break;
  case AMDGPU::SI_INTERP:
    LowerSI_INTERP(MI, *BB, I, MRI);
    break;
  case AMDGPU::SI_INTERP_CONST:
    LowerSI_INTERP_CONST(MI, *BB, I);
    break;
  case AMDGPU::SI_V_CNDLT:
    LowerSI_V_CNDLT(MI, *BB, I, MRI);
    break;
  case AMDGPU::USE_SGPR_32:
  case AMDGPU::USE_SGPR_64:
    lowerUSE_SGPR(MI, BB->getParent(), MRI);
    MI->eraseFromParent();
    break;
  case AMDGPU::VS_LOAD_BUFFER_INDEX:
    addLiveIn(MI, BB->getParent(), MRI, TII, AMDGPU::VGPR0);
    MI->eraseFromParent();
    break;
  }
  return BB;
 }
 void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
    MachineBasicBlock::iterator I) const
 {
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
          .addImm(0);
 }
 void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
 {
  unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
  MachineOperand dst = MI->getOperand(0);
  MachineOperand iReg = MI->getOperand(1);
  MachineOperand jReg = MI->getOperand(2);
  MachineOperand attr_chan = MI->getOperand(3);
  MachineOperand attr = MI->getOperand(4);
  MachineOperand params = MI->getOperand(5);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
          .addReg(AMDGPU::M0)
          .addOperand(params);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
          .addOperand(iReg)
          .addOperand(attr_chan)
          .addOperand(attr);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
          .addOperand(dst)
          .addReg(tmp)
          .addOperand(jReg)
          .addOperand(attr_chan)
          .addOperand(attr);
  MI->eraseFromParent();
 }
 void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
    MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
 {
  MachineOperand dst = MI->getOperand(0);
  MachineOperand attr_chan = MI->getOperand(1);
  MachineOperand attr = MI->getOperand(2);
  MachineOperand params = MI->getOperand(3);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
          .addReg(AMDGPU::M0)
          .addOperand(params);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
          .addOperand(dst)
          .addOperand(attr_chan)
          .addOperand(attr);
  MI->eraseFromParent();
 }
 void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
 {
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32))
          .addOperand(MI->getOperand(1))
          .addReg(AMDGPU::SREG_LIT_0);
  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
          .addOperand(MI->getOperand(0))
          .addOperand(MI->getOperand(2))
          .addOperand(MI->getOperand(3));
  MI->eraseFromParent();
 }
 void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
    MachineFunction * MF, MachineRegisterInfo & MRI) const
 {
  const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
  unsigned dstReg = MI->getOperand(0).getReg();
  int64_t newIndex = MI->getOperand(1).getImm();
  const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
  unsigned DwordWidth = dstClass->getSize() / 4;
  assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
  newIndex = newIndex / DwordWidth;
  unsigned newReg = dstClass->getRegister(newIndex);
  addLiveIn(MI, MF, MRI, TII, newReg); 
 }
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@ -0,0 +1,48 @@
 //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // SI DAG Lowering interface definition
 //
 //===----------------------------------------------------------------------===//
 #ifndef SIISELLOWERING_H
 #define SIISELLOWERING_H
 #include "AMDGPUISelLowering.h"
 #include "SIInstrInfo.h"
 namespace llvm {
 class SITargetLowering : public AMDGPUTargetLowering
 {
  const SIInstrInfo * TII;
  /// AppendS_WAITCNT - Memory reads and writes are syncronized using the
  /// S_WAITCNT instruction.  This function takes the most conservative
  /// approach and inserts an S_WAITCNT instruction after every read and
  /// write.
  void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
              MachineBasicBlock::iterator I) const;
  void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
  void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
              MachineBasicBlock::iterator I) const;
  void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
  void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF,
                     MachineRegisterInfo & MRI) const;
 public:
  SITargetLowering(TargetMachine &tm);
  virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
                                              MachineBasicBlock * BB) const;
 };
 } // End namespace llvm
 #endif //SIISELLOWERING_H
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@ -0,0 +1,128 @@
 //===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // SI Instruction format definitions.
 //
 // Instructions with _32 take 32-bit operands.
 // Instructions with _64 take 64-bit operands.
 //
 // VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
 // encoding is the standard encoding, but instruction that make use of
 // any of the instruction modifiers must use the 64-bit encoding.
 //
 // Instructions with _e32 use the 32-bit encoding.
 // Instructions with _e64 use the 64-bit encoding.
 //
 //===----------------------------------------------------------------------===//
 class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
  : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
 class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
  : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
 class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
  : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
 class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
  : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
 class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
  : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
 class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
  : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
 class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
                   string opName, list<dag> pattern> : 
  VOP1 <
    op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
  >;
 multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
  def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
  def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
                      opName, []
  >;
 }
 multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
  def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
  def _e64 : VOP3_64 <
    {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
    opName, []
  >;
 }
 class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
                   string opName, list<dag> pattern> :
  VOP2 <
    op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
  >;
 multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
  def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
  def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
                      opName, []
  >;
 }
 multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
  def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
  def _e64 : VOP3_64 <
    {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
    opName, []
  >;
 }
 class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
  : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
 class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
  : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
 class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
                 string opName, list<dag> pattern> :
  VOPC <
    op, (outs), (ins arc:$src0, vrc:$src1), opName, pattern
  >;
 multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
  def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
  def _e64 : VOP3_32 <
    {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
    opName, []
  >;
 }
 multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
  def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
  def _e64 : VOP3_64 <
    {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
    opName, []
  >;
 }
 class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
  : SOPC <op, (outs CCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
 class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
  : SOPC <op, (outs CCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@ -0,0 +1,103 @@
 //===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // SI Implementation of TargetInstrInfo.
 //
 //===----------------------------------------------------------------------===//
 #include "SIInstrInfo.h"
 #include "AMDGPUTargetMachine.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include <stdio.h>
 using namespace llvm;
 SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
  : AMDGPUInstrInfo(tm),
    RI(tm, *this)
    { }
 const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
 {
  return RI;
 }
 void
 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI, DebugLoc DL,
                           unsigned DestReg, unsigned SrcReg,
                           bool KillSrc) const
 {
  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
   .addReg(SrcReg, getKillRegState(KillSrc));
 }
 unsigned SIInstrInfo::getEncodingType(const MachineInstr &MI) const
 {
  return get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
 }
 unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
 {
  /* Instructions with literal constants are expanded to 64-bits, and
   * the constant is stored in bits [63:32] */
  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
    if (MI.getOperand(i).getType() == MachineOperand::MO_FPImmediate) {
      return 8;
    }
  }
  /* This instruction always has a literal */
  if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
    return 8;
  }
  unsigned encoding_type = getEncodingType(MI);
  switch (encoding_type) {
    case SIInstrEncodingType::EXP:
    case SIInstrEncodingType::LDS:
    case SIInstrEncodingType::MUBUF:
    case SIInstrEncodingType::MTBUF:
    case SIInstrEncodingType::MIMG:
    case SIInstrEncodingType::VOP3:
      return 8;
    default:
      return 4;
  }
 }
 MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
                                           int64_t Imm) const
 {
  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
  MachineInstrBuilder(MI).addImm(Imm);
  return MI;
 }
 bool SIInstrInfo::isMov(unsigned Opcode) const
 {
  switch(Opcode) {
  default: return false;
  case AMDGPU::S_MOV_B32:
  case AMDGPU::S_MOV_B64:
  case AMDGPU::V_MOV_B32_e32:
  case AMDGPU::V_MOV_B32_e64:
  case AMDGPU::V_MOV_IMM_F32:
  case AMDGPU::V_MOV_IMM_I32:
  case AMDGPU::S_MOV_IMM_I32:
    return true;
  }
 }
--- a/Show More
+++ b/Show More