mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	R600/SI: Remove explicit m0 operand from DS instructions
Instead add m0 as an implicit operand. This helps avoid spills of the m0 register in some cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237141 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -78,6 +78,8 @@ private:
 | 
				
			|||||||
  bool isLocalLoad(const LoadSDNode *N) const;
 | 
					  bool isLocalLoad(const LoadSDNode *N) const;
 | 
				
			||||||
  bool isRegionLoad(const LoadSDNode *N) const;
 | 
					  bool isRegionLoad(const LoadSDNode *N) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDNode *glueCopyToM0(SDNode *N) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
 | 
					  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
 | 
				
			||||||
  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
 | 
					  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
 | 
				
			||||||
  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
 | 
					  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
 | 
				
			||||||
@@ -242,6 +244,32 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
 | 
				
			|||||||
  return true;
 | 
					  return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
 | 
				
			||||||
 | 
					  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 | 
				
			||||||
 | 
					      !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
 | 
				
			||||||
 | 
					                 AMDGPUAS::LOCAL_ADDRESS))
 | 
				
			||||||
 | 
					    return N;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const SITargetLowering& Lowering =
 | 
				
			||||||
 | 
					      *static_cast<const SITargetLowering*>(getTargetLowering());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Write max value to m0 before each load operation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
 | 
				
			||||||
 | 
					                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDValue Glue = M0.getValue(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SmallVector <SDValue, 8> Ops;
 | 
				
			||||||
 | 
					  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
 | 
				
			||||||
 | 
					     Ops.push_back(N->getOperand(i));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  Ops.push_back(Glue);
 | 
				
			||||||
 | 
					  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return N;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 | 
					SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 | 
				
			||||||
  unsigned int Opc = N->getOpcode();
 | 
					  unsigned int Opc = N->getOpcode();
 | 
				
			||||||
  if (N->isMachineOpcode()) {
 | 
					  if (N->isMachineOpcode()) {
 | 
				
			||||||
@@ -249,6 +277,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 | 
				
			|||||||
    return nullptr;   // Already selected.
 | 
					    return nullptr;   // Already selected.
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (isa<AtomicSDNode>(N))
 | 
				
			||||||
 | 
					    N = glueCopyToM0(N);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  switch (Opc) {
 | 
					  switch (Opc) {
 | 
				
			||||||
  default: break;
 | 
					  default: break;
 | 
				
			||||||
  // We are selecting i64 ADD here instead of custom lower it during
 | 
					  // We are selecting i64 ADD here instead of custom lower it during
 | 
				
			||||||
@@ -423,23 +454,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  case ISD::LOAD: {
 | 
					  case ISD::LOAD: {
 | 
				
			||||||
 | 
					    LoadSDNode *LD = cast<LoadSDNode>(N);
 | 
				
			||||||
 | 
					    SDLoc SL(N);
 | 
				
			||||||
 | 
					    EVT VT = N->getValueType(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
 | 
				
			||||||
 | 
					      N = glueCopyToM0(N);
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // To simplify the TableGen patters, we replace all i64 loads with
 | 
					    // To simplify the TableGen patters, we replace all i64 loads with
 | 
				
			||||||
    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
 | 
					    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
 | 
				
			||||||
    // during DAG legalization, however, so places (ExpandUnalignedLoad)
 | 
					    // during DAG legalization, however, so places (ExpandUnalignedLoad)
 | 
				
			||||||
    // in the DAG legalizer assume that if i64 is legal, so doing this
 | 
					    // in the DAG legalizer assume that if i64 is legal, so doing this
 | 
				
			||||||
    // promotion early can cause problems.
 | 
					    // promotion early can cause problems.
 | 
				
			||||||
    EVT VT = N->getValueType(0);
 | 
					 | 
				
			||||||
    LoadSDNode *LD = cast<LoadSDNode>(N);
 | 
					 | 
				
			||||||
    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
 | 
					 | 
				
			||||||
      break;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
 | 
					    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
 | 
				
			||||||
                                      LD->getBasePtr(), LD->getMemOperand());
 | 
					                                      LD->getBasePtr(), LD->getMemOperand());
 | 
				
			||||||
    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
 | 
					    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
 | 
				
			||||||
                                      MVT::i64, NewLoad);
 | 
					                                      MVT::i64, NewLoad);
 | 
				
			||||||
    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
 | 
					    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
 | 
				
			||||||
    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
 | 
					    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
 | 
				
			||||||
    SelectCode(NewLoad.getNode());
 | 
					    SDNode *Load = glueCopyToM0(NewLoad.getNode());
 | 
				
			||||||
 | 
					    SelectCode(Load);
 | 
				
			||||||
    N = BitCast.getNode();
 | 
					    N = BitCast.getNode();
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -448,8 +485,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 | 
				
			|||||||
    // Handle i64 stores here for the same reason mentioned above for loads.
 | 
					    // Handle i64 stores here for the same reason mentioned above for loads.
 | 
				
			||||||
    StoreSDNode *ST = cast<StoreSDNode>(N);
 | 
					    StoreSDNode *ST = cast<StoreSDNode>(N);
 | 
				
			||||||
    SDValue Value = ST->getValue();
 | 
					    SDValue Value = ST->getValue();
 | 
				
			||||||
    if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
 | 
					    if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
 | 
				
			||||||
      break;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
 | 
					      SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
 | 
				
			||||||
                                        MVT::v2i32, Value);
 | 
					                                        MVT::v2i32, Value);
 | 
				
			||||||
@@ -466,6 +502,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 | 
				
			|||||||
      // getNode() may fold the bitcast if its input was another bitcast.  If that
 | 
					      // getNode() may fold the bitcast if its input was another bitcast.  If that
 | 
				
			||||||
      // happens we should only select the new store.
 | 
					      // happens we should only select the new store.
 | 
				
			||||||
      N = NewStore.getNode();
 | 
					      N = NewStore.getNode();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    N = glueCopyToM0(N);
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -183,12 +183,15 @@ def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
				
			|||||||
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 | 
					    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 | 
				
			||||||
}]>;
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
 | 
					class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
 | 
				
			||||||
 | 
					                                              (ld_node node:$ptr), [{
 | 
				
			||||||
  LoadSDNode *L = cast<LoadSDNode>(N);
 | 
					  LoadSDNode *L = cast<LoadSDNode>(N);
 | 
				
			||||||
  return L->getExtensionType() == ISD::ZEXTLOAD ||
 | 
					  return L->getExtensionType() == ISD::ZEXTLOAD ||
 | 
				
			||||||
         L->getExtensionType() == ISD::EXTLOAD;
 | 
					         L->getExtensionType() == ISD::EXTLOAD;
 | 
				
			||||||
}]>;
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def az_extload : AZExtLoadBase <unindexedload>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
 | 
					def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
 | 
				
			||||||
  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
 | 
					  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
 | 
				
			||||||
}]>;
 | 
					}]>;
 | 
				
			||||||
@@ -361,22 +364,26 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr),
 | 
				
			|||||||
  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 | 
					  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 | 
				
			||||||
}]>;
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def atomic_cmp_swap_32_local :
 | 
					  def _32_local : PatFrag <
 | 
				
			||||||
  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
 | 
					    (ops node:$ptr, node:$cmp, node:$swap),
 | 
				
			||||||
          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
 | 
					    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
 | 
				
			||||||
      AtomicSDNode *AN = cast<AtomicSDNode>(N);
 | 
					      AtomicSDNode *AN = cast<AtomicSDNode>(N);
 | 
				
			||||||
      return AN->getMemoryVT() == MVT::i32 &&
 | 
					      return AN->getMemoryVT() == MVT::i32 &&
 | 
				
			||||||
             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 | 
					             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 | 
				
			||||||
}]>;
 | 
					  }]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def atomic_cmp_swap_64_local :
 | 
					  def _64_local : PatFrag<
 | 
				
			||||||
  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
 | 
					    (ops node:$ptr, node:$cmp, node:$swap),
 | 
				
			||||||
          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
 | 
					    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
 | 
				
			||||||
      AtomicSDNode *AN = cast<AtomicSDNode>(N);
 | 
					      AtomicSDNode *AN = cast<AtomicSDNode>(N);
 | 
				
			||||||
      return AN->getMemoryVT() == MVT::i64 &&
 | 
					      return AN->getMemoryVT() == MVT::i64 &&
 | 
				
			||||||
             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 | 
					             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 | 
				
			||||||
}]>;
 | 
					  }]>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
					def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
				
			||||||
    return isFlatLoad(dyn_cast<LoadSDNode>(N));
 | 
					    return isFlatLoad(dyn_cast<LoadSDNode>(N));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -604,7 +604,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
 | 
				
			|||||||
  let LGKM_CNT = 1;
 | 
					  let LGKM_CNT = 1;
 | 
				
			||||||
  let DS = 1;
 | 
					  let DS = 1;
 | 
				
			||||||
  let UseNamedOperandTable = 1;
 | 
					  let UseNamedOperandTable = 1;
 | 
				
			||||||
  let DisableEncoding = "$m0";
 | 
					  let Uses = [M0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Most instruction load and store data, so set this as the default.
 | 
					  // Most instruction load and store data, so set this as the default.
 | 
				
			||||||
  let mayLoad = 1;
 | 
					  let mayLoad = 1;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -124,6 +124,107 @@ def SIconstdata_ptr : SDNode<
 | 
				
			|||||||
  "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
 | 
					  "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
 | 
				
			||||||
 | 
					// to be glued to the memory instructions.
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
 | 
				
			||||||
 | 
					  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
 | 
				
			||||||
 | 
					  return isLocalLoad(cast<LoadSDNode>(N));
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
 | 
				
			||||||
 | 
					         cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_load_local_align8 : Aligned8Bytes <
 | 
				
			||||||
 | 
					  (ops node:$ptr), (si_load_local node:$ptr)
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_sextload_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					def si_az_extload_local : AZExtLoadBase <si_ld_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					multiclass SIExtLoadLocal <PatFrag ld_node> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _i8 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
 | 
				
			||||||
 | 
					                     [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;}]
 | 
				
			||||||
 | 
					  >;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _i16 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
 | 
				
			||||||
 | 
					                     [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;}]
 | 
				
			||||||
 | 
					  >;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					defm si_sextload_local : SIExtLoadLocal <si_sextload_local>;
 | 
				
			||||||
 | 
					defm si_az_extload_local : SIExtLoadLocal <si_az_extload_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def SIst_local : SDNode <"ISD::STORE", SDTStore,
 | 
				
			||||||
 | 
					  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_st_local : PatFrag <
 | 
				
			||||||
 | 
					  (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return isLocalStore(cast<StoreSDNode>(N));
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_store_local : PatFrag <
 | 
				
			||||||
 | 
					  (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
 | 
				
			||||||
 | 
					         !cast<StoreSDNode>(N)->isTruncatingStore();
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_store_local_align8 : Aligned8Bytes <
 | 
				
			||||||
 | 
					  (ops node:$val, node:$ptr), (si_store_local node:$val, node:$ptr)
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_truncstore_local : PatFrag <
 | 
				
			||||||
 | 
					  (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->isTruncatingStore();
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_truncstore_local_i8 : PatFrag <
 | 
				
			||||||
 | 
					  (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_truncstore_local_i16 : PatFrag <
 | 
				
			||||||
 | 
					  (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					multiclass SIAtomicM0Glue2 <string op_name> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
 | 
				
			||||||
 | 
					    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
 | 
				
			||||||
 | 
					  >;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _local : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
 | 
				
			||||||
 | 
					defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
 | 
				
			||||||
 | 
					defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
 | 
				
			||||||
 | 
					defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
 | 
				
			||||||
 | 
					defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
 | 
				
			||||||
 | 
					defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
 | 
				
			||||||
 | 
					defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
 | 
				
			||||||
 | 
					defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
 | 
				
			||||||
 | 
					defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
 | 
				
			||||||
 | 
					defm si_atomic_swap : SIAtomicM0Glue2 <"SWAP">;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
 | 
				
			||||||
 | 
					  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					defm si_atomic_cmp_swap : AtomicCmpSwapLocal <si_atomic_cmp_swap_glue>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Transformation function, extract the lower 32bit of a 64bit immediate
 | 
					// Transformation function, extract the lower 32bit of a 64bit immediate
 | 
				
			||||||
def LO32 : SDNodeXForm<imm, [{
 | 
					def LO32 : SDNodeXForm<imm, [{
 | 
				
			||||||
  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
 | 
					  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
 | 
				
			||||||
@@ -1726,7 +1827,7 @@ class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
 | 
					multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			||||||
  dag outs = (outs rc:$vdst),
 | 
					  dag outs = (outs rc:$vdst),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds, M0Reg:$m0),
 | 
					  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
 | 
				
			||||||
  string asm = opName#" $vdst, $addr"#"$offset$gds"> {
 | 
					  string asm = opName#" $vdst, $addr"#"$offset$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
				
			||||||
@@ -1740,7 +1841,7 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			|||||||
multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
 | 
					multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			||||||
  dag outs = (outs rc:$vdst),
 | 
					  dag outs = (outs rc:$vdst),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
 | 
					  dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
 | 
				
			||||||
                 gds01:$gds, M0Reg:$m0),
 | 
					                 gds01:$gds),
 | 
				
			||||||
  string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
 | 
					  string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
				
			||||||
@@ -1753,8 +1854,7 @@ multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
					multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			||||||
  dag outs = (outs),
 | 
					  dag outs = (outs),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
 | 
					  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
 | 
				
			||||||
                 M0Reg:$m0),
 | 
					 | 
				
			||||||
  string asm = opName#" $addr, $data0"#"$offset$gds"> {
 | 
					  string asm = opName#" $addr, $data0"#"$offset$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>,
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>,
 | 
				
			||||||
@@ -1769,7 +1869,7 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			|||||||
multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
					multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			||||||
  dag outs = (outs),
 | 
					  dag outs = (outs),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
 | 
					  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
 | 
				
			||||||
              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
 | 
					              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds),
 | 
				
			||||||
  string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
 | 
					  string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
				
			||||||
@@ -1783,8 +1883,7 @@ multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			|||||||
multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
 | 
					multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			||||||
                        string noRetOp = "",
 | 
					                        string noRetOp = "",
 | 
				
			||||||
  dag outs = (outs rc:$vdst),
 | 
					  dag outs = (outs rc:$vdst),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
 | 
					  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
 | 
				
			||||||
                 M0Reg:$m0),
 | 
					 | 
				
			||||||
  string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
 | 
					  string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>,
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>,
 | 
				
			||||||
@@ -1812,14 +1911,14 @@ multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
 | 
				
			|||||||
                        string noRetOp = "", RegisterClass src = rc> :
 | 
					                        string noRetOp = "", RegisterClass src = rc> :
 | 
				
			||||||
  DS_1A2D_RET_m <op, asm, rc, noRetOp,
 | 
					  DS_1A2D_RET_m <op, asm, rc, noRetOp,
 | 
				
			||||||
                 (ins VGPR_32:$addr, src:$data0, src:$data1,
 | 
					                 (ins VGPR_32:$addr, src:$data0, src:$data1,
 | 
				
			||||||
                      ds_offset:$offset, gds:$gds, M0Reg:$m0)
 | 
					                      ds_offset:$offset, gds:$gds)
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
					multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			||||||
                          string noRetOp = opName,
 | 
					                          string noRetOp = opName,
 | 
				
			||||||
  dag outs = (outs),
 | 
					  dag outs = (outs),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
 | 
					  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
 | 
				
			||||||
                 ds_offset:$offset, gds:$gds, M0Reg:$m0),
 | 
					                 ds_offset:$offset, gds:$gds),
 | 
				
			||||||
  string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
 | 
					  string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>,
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>,
 | 
				
			||||||
@@ -1833,7 +1932,7 @@ multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
multiclass DS_0A_RET <bits<8> op, string opName,
 | 
					multiclass DS_0A_RET <bits<8> op, string opName,
 | 
				
			||||||
  dag outs = (outs VGPR_32:$vdst),
 | 
					  dag outs = (outs VGPR_32:$vdst),
 | 
				
			||||||
  dag ins = (ins ds_offset:$offset, gds:$gds, M0Reg:$m0),
 | 
					  dag ins = (ins ds_offset:$offset, gds:$gds),
 | 
				
			||||||
  string asm = opName#" $vdst"#"$offset"#"$gds"> {
 | 
					  string asm = opName#" $vdst"#"$offset"#"$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  let mayLoad = 1, mayStore = 1 in {
 | 
					  let mayLoad = 1, mayStore = 1 in {
 | 
				
			||||||
@@ -1848,7 +1947,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
multiclass DS_1A_RET_GDS <bits<8> op, string opName,
 | 
					multiclass DS_1A_RET_GDS <bits<8> op, string opName,
 | 
				
			||||||
  dag outs = (outs VGPR_32:$vdst),
 | 
					  dag outs = (outs VGPR_32:$vdst),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
 | 
					  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset),
 | 
				
			||||||
  string asm = opName#" $vdst, $addr"#"$offset gds"> {
 | 
					  string asm = opName#" $vdst, $addr"#"$offset gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
				
			||||||
@@ -1861,7 +1960,7 @@ multiclass DS_1A_RET_GDS <bits<8> op, string opName,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
multiclass DS_1A_GDS <bits<8> op, string opName,
 | 
					multiclass DS_1A_GDS <bits<8> op, string opName,
 | 
				
			||||||
  dag outs = (outs),
 | 
					  dag outs = (outs),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, M0Reg:$m0),
 | 
					  dag ins = (ins VGPR_32:$addr),
 | 
				
			||||||
  string asm = opName#" $addr gds"> {
 | 
					  string asm = opName#" $addr gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
					  def "" : DS_Pseudo <opName, outs, ins, []>;
 | 
				
			||||||
@@ -1874,7 +1973,7 @@ multiclass DS_1A_GDS <bits<8> op, string opName,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
multiclass DS_1A <bits<8> op, string opName,
 | 
					multiclass DS_1A <bits<8> op, string opName,
 | 
				
			||||||
  dag outs = (outs),
 | 
					  dag outs = (outs),
 | 
				
			||||||
  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0, gds:$gds),
 | 
					  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
 | 
				
			||||||
  string asm = opName#" $addr"#"$offset"#"$gds"> {
 | 
					  string asm = opName#" $addr"#"$offset"#"$gds"> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  let mayLoad = 1, mayStore = 1 in {
 | 
					  let mayLoad = 1, mayStore = 1 in {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2824,52 +2824,52 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
					class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
				
			||||||
  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
 | 
					  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
 | 
				
			||||||
  (inst $ptr, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
 | 
					  (inst $ptr, (as_i16imm $offset), (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSReadPat <DS_READ_I8,  i32, sextloadi8_local>;
 | 
					def : DSReadPat <DS_READ_I8,  i32, si_sextload_local_i8>;
 | 
				
			||||||
def : DSReadPat <DS_READ_U8,  i32, az_extloadi8_local>;
 | 
					def : DSReadPat <DS_READ_U8,  i32, si_az_extload_local_i8>;
 | 
				
			||||||
def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
 | 
					def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
 | 
				
			||||||
def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
 | 
					def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
 | 
				
			||||||
def : DSReadPat <DS_READ_B32, i32, local_load>;
 | 
					def : DSReadPat <DS_READ_B32, i32, si_load_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let AddedComplexity = 100 in {
 | 
					let AddedComplexity = 100 in {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
 | 
					def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // End AddedComplexity = 100
 | 
					} // End AddedComplexity = 100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat <
 | 
					def : Pat <
 | 
				
			||||||
  (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
 | 
					  (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
 | 
				
			||||||
                                                    i8:$offset1))),
 | 
					                                                    i8:$offset1))),
 | 
				
			||||||
  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0), (S_MOV_B32 -1))
 | 
					  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
					class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
				
			||||||
  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
 | 
					  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
 | 
				
			||||||
  (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
 | 
					  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
 | 
					def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
 | 
				
			||||||
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
 | 
					def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
 | 
				
			||||||
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
 | 
					def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let AddedComplexity = 100 in {
 | 
					let AddedComplexity = 100 in {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
 | 
					def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
 | 
				
			||||||
} // End AddedComplexity = 100
 | 
					} // End AddedComplexity = 100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat <
 | 
					def : Pat <
 | 
				
			||||||
  (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
 | 
					  (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
 | 
				
			||||||
                                                               i8:$offset1)),
 | 
					                                                               i8:$offset1)),
 | 
				
			||||||
  (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
 | 
					  (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
 | 
				
			||||||
                       (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
 | 
					                       (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
 | 
				
			||||||
                       (i1 0), (S_MOV_B32 -1))
 | 
					                       (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
					class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
				
			||||||
  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
 | 
					  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
 | 
				
			||||||
  (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
 | 
					  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
 | 
					// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
 | 
				
			||||||
@@ -2885,53 +2885,53 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
				
			|||||||
class DSAtomicIncRetPat<DS inst, ValueType vt,
 | 
					class DSAtomicIncRetPat<DS inst, ValueType vt,
 | 
				
			||||||
                        Instruction LoadImm, PatFrag frag> : Pat <
 | 
					                        Instruction LoadImm, PatFrag frag> : Pat <
 | 
				
			||||||
  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
 | 
					  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
 | 
				
			||||||
  (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
 | 
					  (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
					class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
 | 
				
			||||||
  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
 | 
					  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
 | 
				
			||||||
  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
 | 
					  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// 32-bit atomics.
 | 
					// 32-bit atomics.
 | 
				
			||||||
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
 | 
					def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
 | 
				
			||||||
                        S_MOV_B32, atomic_load_add_local>;
 | 
					                        S_MOV_B32, si_atomic_load_add_local>;
 | 
				
			||||||
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
 | 
					def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
 | 
				
			||||||
                        S_MOV_B32, atomic_load_sub_local>;
 | 
					                        S_MOV_B32, si_atomic_load_sub_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
 | 
					def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
 | 
					def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
 | 
					def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
 | 
					def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
 | 
					def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
 | 
					def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
 | 
					def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
 | 
					def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
 | 
					def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
 | 
					def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
 | 
					def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// 64-bit atomics.
 | 
					// 64-bit atomics.
 | 
				
			||||||
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
 | 
					def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
 | 
				
			||||||
                        S_MOV_B64, atomic_load_add_local>;
 | 
					                        S_MOV_B64, si_atomic_load_add_local>;
 | 
				
			||||||
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
 | 
					def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
 | 
				
			||||||
                        S_MOV_B64, atomic_load_sub_local>;
 | 
					                        S_MOV_B64, si_atomic_load_sub_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
 | 
					def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
 | 
					def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
 | 
					def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
 | 
					def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
 | 
					def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
 | 
					def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
 | 
					def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
 | 
					def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
 | 
					def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
 | 
				
			||||||
def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
 | 
					def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
 | 
					def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -213,7 +213,6 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
 | 
				
			|||||||
  // Be careful, since the addresses could be subregisters themselves in weird
 | 
					  // Be careful, since the addresses could be subregisters themselves in weird
 | 
				
			||||||
  // cases, like vectors of pointers.
 | 
					  // cases, like vectors of pointers.
 | 
				
			||||||
  const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
 | 
					  const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
 | 
				
			||||||
  const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
 | 
					  unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
 | 
				
			||||||
  unsigned DestReg1
 | 
					  unsigned DestReg1
 | 
				
			||||||
@@ -254,37 +253,24 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
 | 
				
			|||||||
    .addImm(NewOffset0) // offset0
 | 
					    .addImm(NewOffset0) // offset0
 | 
				
			||||||
    .addImm(NewOffset1) // offset1
 | 
					    .addImm(NewOffset1) // offset1
 | 
				
			||||||
    .addImm(0) // gds
 | 
					    .addImm(0) // gds
 | 
				
			||||||
    .addOperand(*M0Reg) // M0
 | 
					 | 
				
			||||||
    .addMemOperand(*I->memoperands_begin())
 | 
					    .addMemOperand(*I->memoperands_begin())
 | 
				
			||||||
    .addMemOperand(*Paired->memoperands_begin());
 | 
					    .addMemOperand(*Paired->memoperands_begin());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LIS->InsertMachineInstrInMaps(Read2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
 | 
					  unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
 | 
				
			||||||
  unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
 | 
					  unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
 | 
				
			||||||
  updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
 | 
					  updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
 | 
				
			||||||
  updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
 | 
					  updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LIS->RemoveMachineInstrFromMaps(I);
 | 
					  LIS->RemoveMachineInstrFromMaps(I);
 | 
				
			||||||
  LIS->RemoveMachineInstrFromMaps(Paired);
 | 
					  // Replacing Paired in the maps with Read2 allows us to avoid updating the
 | 
				
			||||||
 | 
					  // live range for the m0 register.
 | 
				
			||||||
 | 
					  LIS->ReplaceMachineInstrInMaps(Paired, Read2);
 | 
				
			||||||
  I->eraseFromParent();
 | 
					  I->eraseFromParent();
 | 
				
			||||||
  Paired->eraseFromParent();
 | 
					  Paired->eraseFromParent();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
 | 
					  LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
 | 
				
			||||||
  LIS->shrinkToUses(&AddrRegLI);
 | 
					  LIS->shrinkToUses(&AddrRegLI);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
 | 
					 | 
				
			||||||
  LIS->shrinkToUses(&M0RegLI);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Currently m0 is treated as a register class with one member instead of an
 | 
					 | 
				
			||||||
  // implicit physical register. We are using the virtual register for the first
 | 
					 | 
				
			||||||
  // one, but we still need to update the live range of the now unused second m0
 | 
					 | 
				
			||||||
  // virtual register to avoid verifier errors.
 | 
					 | 
				
			||||||
  const MachineOperand *PairedM0Reg
 | 
					 | 
				
			||||||
    = TII->getNamedOperand(*Paired, AMDGPU::OpName::m0);
 | 
					 | 
				
			||||||
  LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg());
 | 
					 | 
				
			||||||
  LIS->shrinkToUses(&PairedM0RegLI);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  LIS->getInterval(DestReg); // Create new LI
 | 
					  LIS->getInterval(DestReg); // Create new LI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
 | 
					  DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
 | 
				
			||||||
@@ -300,7 +286,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
 | 
				
			|||||||
  // Be sure to use .addOperand(), and not .addReg() with these. We want to be
 | 
					  // Be sure to use .addOperand(), and not .addReg() with these. We want to be
 | 
				
			||||||
  // sure we preserve the subregister index and any register flags set on them.
 | 
					  // sure we preserve the subregister index and any register flags set on them.
 | 
				
			||||||
  const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
 | 
					  const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
 | 
				
			||||||
  const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
 | 
					 | 
				
			||||||
  const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
 | 
					  const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
 | 
				
			||||||
  const MachineOperand *Data1
 | 
					  const MachineOperand *Data1
 | 
				
			||||||
    = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
 | 
					    = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
 | 
				
			||||||
@@ -331,6 +316,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
 | 
				
			|||||||
  const MCInstrDesc &Write2Desc = TII->get(Opc);
 | 
					  const MCInstrDesc &Write2Desc = TII->get(Opc);
 | 
				
			||||||
  DebugLoc DL = I->getDebugLoc();
 | 
					  DebugLoc DL = I->getDebugLoc();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // repairLiveintervalsInRange() doesn't handle physical register, so we have
 | 
				
			||||||
 | 
					  // to update the M0 range manually.
 | 
				
			||||||
 | 
					  SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
 | 
				
			||||||
 | 
					  LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
 | 
				
			||||||
 | 
					  LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
 | 
				
			||||||
 | 
					  bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  MachineInstrBuilder Write2
 | 
					  MachineInstrBuilder Write2
 | 
				
			||||||
    = BuildMI(*MBB, I, DL, Write2Desc)
 | 
					    = BuildMI(*MBB, I, DL, Write2Desc)
 | 
				
			||||||
    .addOperand(*Addr) // addr
 | 
					    .addOperand(*Addr) // addr
 | 
				
			||||||
@@ -339,21 +331,25 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
 | 
				
			|||||||
    .addImm(NewOffset0) // offset0
 | 
					    .addImm(NewOffset0) // offset0
 | 
				
			||||||
    .addImm(NewOffset1) // offset1
 | 
					    .addImm(NewOffset1) // offset1
 | 
				
			||||||
    .addImm(0) // gds
 | 
					    .addImm(0) // gds
 | 
				
			||||||
    .addOperand(*M0Reg)  // m0
 | 
					 | 
				
			||||||
    .addMemOperand(*I->memoperands_begin())
 | 
					    .addMemOperand(*I->memoperands_begin())
 | 
				
			||||||
    .addMemOperand(*Paired->memoperands_begin());
 | 
					    .addMemOperand(*Paired->memoperands_begin());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // XXX - How do we express subregisters here?
 | 
					  // XXX - How do we express subregisters here?
 | 
				
			||||||
  unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
 | 
					  unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
 | 
				
			||||||
                          M0Reg->getReg()};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LIS->RemoveMachineInstrFromMaps(I);
 | 
					  LIS->RemoveMachineInstrFromMaps(I);
 | 
				
			||||||
  LIS->RemoveMachineInstrFromMaps(Paired);
 | 
					  LIS->RemoveMachineInstrFromMaps(Paired);
 | 
				
			||||||
  I->eraseFromParent();
 | 
					  I->eraseFromParent();
 | 
				
			||||||
  Paired->eraseFromParent();
 | 
					  Paired->eraseFromParent();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // This doesn't handle physical registers like M0
 | 
				
			||||||
  LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
 | 
					  LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (UpdateM0Range) {
 | 
				
			||||||
 | 
					    SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
 | 
				
			||||||
 | 
					    M0Segment->end = Write2Index.getRegSlot();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
 | 
					  DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
 | 
				
			||||||
  return Write2.getInstr();
 | 
					  return Write2.getInstr();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
; SI-LABEL: @simple_read2st64_f32_over_max_offset
 | 
					; SI-LABEL: @simple_read2st64_f32_over_max_offset
 | 
				
			||||||
; SI-NOT: ds_read2st64_b32
 | 
					; SI-NOT: ds_read2st64_b32
 | 
				
			||||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
 | 
					 | 
				
			||||||
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
 | 
					; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
 | 
				
			||||||
 | 
					; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
 | 
				
			||||||
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
 | 
					; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
 | 
				
			||||||
; SI: s_endpgm
 | 
					; SI: s_endpgm
 | 
				
			||||||
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
 | 
					define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -69,8 +69,8 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
 | 
				
			|||||||
; pointer can be used with an offset into the second one.
 | 
					; pointer can be used with an offset into the second one.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-LABEL: {{^}}load_shl_base_lds_2:
 | 
					; SI-LABEL: {{^}}load_shl_base_lds_2:
 | 
				
			||||||
 | 
					; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 | 
				
			||||||
; SI: s_mov_b32 m0, -1
 | 
					; SI: s_mov_b32 m0, -1
 | 
				
			||||||
; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 | 
					 | 
				
			||||||
; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
 | 
					; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
 | 
				
			||||||
; SI: s_endpgm
 | 
					; SI: s_endpgm
 | 
				
			||||||
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
 | 
					define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user