diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 8a3ab46ad22..417356d800f 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -1648,57 +1648,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
   return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
 }
 
-/// \brief Make sure that we don't exeed the number of allowed scalars
-void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
-                                       unsigned RegClass,
-                                       bool &ScalarSlotUsed) const {
-
-  if (!isVSrc(RegClass))
-    return;
-
-  // First map the operands register class to a destination class
-  switch (RegClass) {
-    case AMDGPU::VSrc_32RegClassID:
-    case AMDGPU::VCSrc_32RegClassID:
-      RegClass = AMDGPU::VReg_32RegClassID;
-      break;
-    case AMDGPU::VSrc_64RegClassID:
-    case AMDGPU::VCSrc_64RegClassID:
-      RegClass = AMDGPU::VReg_64RegClassID;
-      break;
-   default:
-    llvm_unreachable("Unknown vsrc reg class");
-  }
-
-  // Nothing to do if they fit naturally
-  if (fitsRegClass(DAG, Operand, RegClass))
-    return;
-
-  // If the scalar slot isn't used yet use it now
-  if (!ScalarSlotUsed) {
-    ScalarSlotUsed = true;
-    return;
-  }
-
-  // This is a conservative aproach. It is possible that we can't determine the
-  // correct register class and copy too often, but better safe than sorry.
-
-  SDNode *Node;
-  // We can't use COPY_TO_REGCLASS with FrameIndex arguments.
-  if (isa<FrameIndexSDNode>(Operand) ||
-      isa<GlobalAddressSDNode>(Operand)) {
-    unsigned Opcode = Operand.getValueType() == MVT::i32 ?
-                      AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
-    Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(),
-                              Operand);
-  } else {
-    SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
-    Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
-                              Operand.getValueType(), Operand, RC);
-  }
-  Operand = SDValue(Node, 0);
-}
-
 /// \returns true if \p Node's operands are different from the SDValue list
 /// \p Ops
 static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
@@ -1710,8 +1659,9 @@ static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
   return false;
 }
 
-/// \brief Try to commute instructions and insert copies in order to satisfy the
-/// operand constraints.
+/// TODO: This needs to be removed. It's current primary purpose is to fold
+/// immediates into operands when legal. The legalization parts are redundant
+/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
 SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
                                            SelectionDAG &DAG) const {
   // Original encoding (either e32 or e64)
@@ -1784,11 +1734,9 @@ SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
     // Is this a VSrc or SSrc operand?
     unsigned RegClass = Desc->OpInfo[Op].RegClass;
     if (isVSrc(RegClass) || isSSrc(RegClass)) {
-      // Try to fold the immediates
-      if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
-        // Folding didn't work, make sure we don't hit the SReg limit.
-        ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
-      }
+      // Try to fold the immediates. If this ends up with multiple constant bus
+      // uses, it will be legalized later.
+      foldImm(Ops[i], Immediate, ScalarSlotUsed);
       continue;
     }
 
@@ -1938,6 +1886,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
       getTargetMachine().getSubtargetImpl()->getInstrInfo());
 
+  TII->legalizeOperands(MI);
+
   if (TII->isMIMG(MI->getOpcode())) {
     unsigned VReg = MI->getOperand(0).getReg();
     unsigned Writemask = MI->getOperand(1).getImm();
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index f953b482fdd..9cf4dbcb2fd 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -47,8 +47,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
                                                 const SDValue &Op) const;
   bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
                     unsigned RegClass) const;
-  void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
-                       unsigned RegClass, bool &ScalarSlotUsed) const;
 
   SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
   void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 8369a0c8687..c1fc4b3a974 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -42,6 +42,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
   let TSFlags{10} = MUBUF;
   let TSFlags{11} = MTBUF;
   let TSFlags{12} = FLAT;
+
+  // Most instructions require adjustments after selection to satisfy
+  // operand requirements.
+  let hasPostISelHook = 1;
 }
 
 class Enc32 {
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index fb5dad13845..ed8d9793af7 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -1394,20 +1394,39 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
 
     int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
 
-    // First we need to consider the instruction's operand requirements before
-    // legalizing. Some operands are required to be SGPRs, but we are still
-    // bound by the constant bus requirement to only use one.
-    //
-    // If the operand's class is an SGPR, we can never move it.
-    for (unsigned i = 0; i < 3; ++i) {
-      int Idx = VOP3Idx[i];
-      if (Idx == -1)
-        break;
+    for (const MachineOperand &MO : MI->implicit_operands()) {
+      // We only care about reads.
+      if (MO.isDef())
+        continue;
 
-      if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
-        SGPRReg = MI->getOperand(Idx).getReg();
+      if (MO.getReg() == AMDGPU::VCC) {
+        SGPRReg = AMDGPU::VCC;
         break;
       }
+
+      if (MO.getReg() == AMDGPU::FLAT_SCR) {
+        SGPRReg = AMDGPU::FLAT_SCR;
+        break;
+      }
+    }
+
+
+    if (SGPRReg == AMDGPU::NoRegister) {
+      // First we need to consider the instruction's operand requirements before
+      // legalizing. Some operands are required to be SGPRs, but we are still
+      // bound by the constant bus requirement to only use one.
+      //
+      // If the operand's class is an SGPR, we can never move it.
+      for (unsigned i = 0; i < 3; ++i) {
+        int Idx = VOP3Idx[i];
+        if (Idx == -1)
+          break;
+
+        if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
+          SGPRReg = MI->getOperand(Idx).getReg();
+          break;
+        }
+      }
     }
 
     for (unsigned i = 0; i < 3; ++i) {
diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll
index 61d95135a4a..f0b341b7a6b 100644
--- a/test/CodeGen/R600/fneg.f64.ll
+++ b/test/CodeGen/R600/fneg.f64.ll
@@ -50,7 +50,7 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
 ; SI-LABEL: @fneg_fold
 ; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-NOT: XOR
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}}
+; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
   %fsub = fsub double -0.0, %in
   %fmul = fmul double %fsub, %in
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index 72cd15c7d70..8631301b49c 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -59,7 +59,7 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
 ; FUNC-LABEL: @fneg_fold
 ; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 ; SI-NOT: XOR
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
+; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
   %fsub = fsub float -0.0, %in
   %fmul = fmul float %fsub, %in
diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll
index 8c86fb5aeab..a9dee8ca78c 100644
--- a/test/CodeGen/R600/rotl.ll
+++ b/test/CodeGen/R600/rotl.ll
@@ -8,7 +8,7 @@
 
 ; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
 ; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
 define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
   %0 = shl i32 %x, %y
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index cc942c10a91..eb1176f5855 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
+; CHECK-LABEL: @main
+; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
 define void @main(float %p) {
 main_body:
   %c = fcmp oeq float %p, %p
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index 33007fc754b..a78e8e6b45b 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
+; CHECK-LABEL: @main
+; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
 define void @main(float %p) {
 main_body:
   %c = fcmp une float %p, %p