mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	R600/SI: Clean up some of the unused REGISTER_{LOAD,STORE} code
There are a few more cleanups to do, but I ran into some problems with ext loads and trunc stores, when I tried to change some of the vector loads and stores from custom to legal, so I wasn't able to get rid of everything. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213552 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -90,7 +90,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 | 
				
			|||||||
  setOperationAction(ISD::FCOS, MVT::f32, Custom);
 | 
					  setOperationAction(ISD::FCOS, MVT::f32, Custom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // We need to custom lower vector stores from local memory
 | 
					  // We need to custom lower vector stores from local memory
 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
 | 
					  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
 | 
					  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
 | 
					  setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
 | 
				
			||||||
@@ -98,12 +97,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 | 
				
			|||||||
  setOperationAction(ISD::STORE, MVT::v8i32, Custom);
 | 
					  setOperationAction(ISD::STORE, MVT::v8i32, Custom);
 | 
				
			||||||
  setOperationAction(ISD::STORE, MVT::v16i32, Custom);
 | 
					  setOperationAction(ISD::STORE, MVT::v16i32, Custom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // We need to custom lower loads/stores from private memory
 | 
					 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::i32, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  setOperationAction(ISD::STORE, MVT::i1, Custom);
 | 
					  setOperationAction(ISD::STORE, MVT::i1, Custom);
 | 
				
			||||||
  setOperationAction(ISD::STORE, MVT::i32, Custom);
 | 
					  setOperationAction(ISD::STORE, MVT::i32, Custom);
 | 
				
			||||||
  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
 | 
					  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
 | 
				
			||||||
@@ -265,15 +258,6 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
 | 
				
			|||||||
  // see what for specifically. The wording everywhere else seems to be the
 | 
					  // see what for specifically. The wording everywhere else seems to be the
 | 
				
			||||||
  // same.
 | 
					  // same.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
 | 
					 | 
				
			||||||
  // no alignment restrictions.
 | 
					 | 
				
			||||||
  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
 | 
					 | 
				
			||||||
    // Using any pair of GPRs should be the same as any other pair.
 | 
					 | 
				
			||||||
    if (IsFast)
 | 
					 | 
				
			||||||
      *IsFast = true;
 | 
					 | 
				
			||||||
    return VT.bitsGE(MVT::i64);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // XXX - The only mention I see of this in the ISA manual is for LDS direct
 | 
					  // XXX - The only mention I see of this in the ISA manual is for LDS direct
 | 
				
			||||||
  // reads the "byte address and must be dword aligned". Is it also true for the
 | 
					  // reads the "byte address and must be dword aligned". Is it also true for the
 | 
				
			||||||
  // normal loads and stores?
 | 
					  // normal loads and stores?
 | 
				
			||||||
@@ -282,6 +266,7 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
 | 
					  // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
 | 
				
			||||||
  // byte-address are ignored, thus forcing Dword alignment.
 | 
					  // byte-address are ignored, thus forcing Dword alignment.
 | 
				
			||||||
 | 
					  // This applies to private, global, and constant memory.
 | 
				
			||||||
  if (IsFast)
 | 
					  if (IsFast)
 | 
				
			||||||
    *IsFast = true;
 | 
					    *IsFast = true;
 | 
				
			||||||
  return VT.bitsGT(MVT::i32);
 | 
					  return VT.bitsGT(MVT::i32);
 | 
				
			||||||
@@ -658,28 +643,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			|||||||
  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
 | 
					  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
 | 
				
			||||||
  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
 | 
					  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
 | 
				
			||||||
  case ISD::LOAD: {
 | 
					  case ISD::LOAD: {
 | 
				
			||||||
    LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
 | 
					    SDValue Result = LowerLOAD(Op, DAG);
 | 
				
			||||||
    EVT VT = Op.getValueType();
 | 
					    assert((!Result.getNode() ||
 | 
				
			||||||
 | 
					            Result.getNode()->getNumValues() == 2) &&
 | 
				
			||||||
    // These loads are legal.
 | 
					           "Load should return a value and a chain");
 | 
				
			||||||
    if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
 | 
					    return Result;
 | 
				
			||||||
        VT.isVector() && VT.getVectorNumElements() == 2 &&
 | 
					 | 
				
			||||||
        VT.getVectorElementType() == MVT::i32)
 | 
					 | 
				
			||||||
      return SDValue();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (Op.getValueType().isVector() &&
 | 
					 | 
				
			||||||
        (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
 | 
					 | 
				
			||||||
         Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
 | 
					 | 
				
			||||||
         (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
 | 
					 | 
				
			||||||
          Op.getValueType().getVectorNumElements() > 4))) {
 | 
					 | 
				
			||||||
      return SplitVectorLoad(Op, DAG);
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
      SDValue Result = LowerLOAD(Op, DAG);
 | 
					 | 
				
			||||||
      assert((!Result.getNode() ||
 | 
					 | 
				
			||||||
              Result.getNode()->getNumValues() == 2) &&
 | 
					 | 
				
			||||||
             "Load should return a value and a chain");
 | 
					 | 
				
			||||||
      return Result;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  case ISD::FSIN:
 | 
					  case ISD::FSIN:
 | 
				
			||||||
@@ -940,58 +908,26 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
 | 
				
			|||||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 | 
					SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
  SDLoc DL(Op);
 | 
					  SDLoc DL(Op);
 | 
				
			||||||
  LoadSDNode *Load = cast<LoadSDNode>(Op);
 | 
					  LoadSDNode *Load = cast<LoadSDNode>(Op);
 | 
				
			||||||
  // Vector private memory loads have already been split, and
 | 
					 | 
				
			||||||
  // all the rest of private memory loads are legal.
 | 
					 | 
				
			||||||
  if (Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
 | 
					 | 
				
			||||||
    return SDValue();
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
 | 
					 | 
				
			||||||
  if (Lowered.getNode())
 | 
					 | 
				
			||||||
    return Lowered;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
 | 
					  if (Op.getValueType().isVector()) {
 | 
				
			||||||
    return SDValue();
 | 
					    assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
 | 
				
			||||||
 | 
					           "Custom lowering for non-i32 vectors hasn't been implemented.");
 | 
				
			||||||
 | 
					    unsigned NumElements = Op.getValueType().getVectorNumElements();
 | 
				
			||||||
 | 
					    assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
 | 
				
			||||||
 | 
					    switch (Load->getAddressSpace()) {
 | 
				
			||||||
 | 
					      default: break;
 | 
				
			||||||
 | 
					      case AMDGPUAS::GLOBAL_ADDRESS:
 | 
				
			||||||
 | 
					      case AMDGPUAS::PRIVATE_ADDRESS:
 | 
				
			||||||
 | 
					        // v4 loads are supported for private and global memory.
 | 
				
			||||||
 | 
					        if (NumElements <= 4)
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        // fall-through
 | 
				
			||||||
 | 
					      case AMDGPUAS::LOCAL_ADDRESS:
 | 
				
			||||||
 | 
					        return SplitVectorLoad(Op, DAG);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  EVT MemVT = Load->getMemoryVT();
 | 
					  return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
 | 
				
			||||||
 | 
					 | 
				
			||||||
  assert(!MemVT.isVector() && "Private loads should be scalarized");
 | 
					 | 
				
			||||||
  assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
 | 
					 | 
				
			||||||
                            DAG.getConstant(2, MVT::i32));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // FIXME: REGISTER_LOAD should probably have a chain result.
 | 
					 | 
				
			||||||
  SDValue Chain = Load->getChain();
 | 
					 | 
				
			||||||
  SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
 | 
					 | 
				
			||||||
                               Chain, Ptr,
 | 
					 | 
				
			||||||
                               DAG.getTargetConstant(0, MVT::i32),
 | 
					 | 
				
			||||||
                               Op.getOperand(2));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDValue Ret = LoLoad.getValue(0);
 | 
					 | 
				
			||||||
  if (MemVT.getSizeInBits() == 64) {
 | 
					 | 
				
			||||||
    // TODO: This needs a test to make sure the right thing is happening with
 | 
					 | 
				
			||||||
    // the chain. That is hard without general function support.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
 | 
					 | 
				
			||||||
                                 DAG.getConstant(1, MVT::i32));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
 | 
					 | 
				
			||||||
                                 Chain, IncPtr,
 | 
					 | 
				
			||||||
                                 DAG.getTargetConstant(0, MVT::i32),
 | 
					 | 
				
			||||||
                                 Op.getOperand(2));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad);
 | 
					 | 
				
			||||||
    // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
 | 
					 | 
				
			||||||
    //                     LoLoad.getValue(1), HiLoad.getValue(1));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDValue Ops[] = {
 | 
					 | 
				
			||||||
    Ret,
 | 
					 | 
				
			||||||
    Chain
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return DAG.getMergeValues(Ops, DL);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
 | 
					SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
 | 
				
			||||||
@@ -1153,67 +1089,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			|||||||
                        DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
 | 
					                        DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
 | 
				
			||||||
                        Store->getBasePtr(), MVT::i1, Store->getMemOperand());
 | 
					                        Store->getBasePtr(), MVT::i1, Store->getMemOperand());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
 | 
					  return SDValue();
 | 
				
			||||||
    return SDValue();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(),
 | 
					 | 
				
			||||||
                            DAG.getConstant(2, MVT::i32));
 | 
					 | 
				
			||||||
  SDValue Chain = Store->getChain();
 | 
					 | 
				
			||||||
  SmallVector<SDValue, 8> Values;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if (Store->isTruncatingStore()) {
 | 
					 | 
				
			||||||
    unsigned Mask = 0;
 | 
					 | 
				
			||||||
    if (Store->getMemoryVT() == MVT::i8) {
 | 
					 | 
				
			||||||
      Mask = 0xff;
 | 
					 | 
				
			||||||
    } else if (Store->getMemoryVT() == MVT::i16) {
 | 
					 | 
				
			||||||
      Mask = 0xffff;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
 | 
					 | 
				
			||||||
                              Chain, Store->getBasePtr(),
 | 
					 | 
				
			||||||
                              DAG.getConstant(0, MVT::i32));
 | 
					 | 
				
			||||||
    SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(),
 | 
					 | 
				
			||||||
                                  DAG.getConstant(0x3, MVT::i32));
 | 
					 | 
				
			||||||
    SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
 | 
					 | 
				
			||||||
                                   DAG.getConstant(3, MVT::i32));
 | 
					 | 
				
			||||||
    SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(),
 | 
					 | 
				
			||||||
                                      DAG.getConstant(Mask, MVT::i32));
 | 
					 | 
				
			||||||
    SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
 | 
					 | 
				
			||||||
                                       MaskedValue, ShiftAmt);
 | 
					 | 
				
			||||||
    SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32,
 | 
					 | 
				
			||||||
                                  DAG.getConstant(32, MVT::i32), ShiftAmt);
 | 
					 | 
				
			||||||
    SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32,
 | 
					 | 
				
			||||||
                                  DAG.getConstant(Mask, MVT::i32),
 | 
					 | 
				
			||||||
                                  RotrAmt);
 | 
					 | 
				
			||||||
    Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
 | 
					 | 
				
			||||||
    Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Values.push_back(Dst);
 | 
					 | 
				
			||||||
  } else if (VT == MVT::i64) {
 | 
					 | 
				
			||||||
    for (unsigned i = 0; i < 2; ++i) {
 | 
					 | 
				
			||||||
      Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
 | 
					 | 
				
			||||||
                       Store->getValue(), DAG.getConstant(i, MVT::i32)));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  } else if (VT == MVT::i128) {
 | 
					 | 
				
			||||||
    for (unsigned i = 0; i < 2; ++i) {
 | 
					 | 
				
			||||||
      for (unsigned j = 0; j < 2; ++j) {
 | 
					 | 
				
			||||||
        Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
 | 
					 | 
				
			||||||
                           DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
 | 
					 | 
				
			||||||
                           Store->getValue(), DAG.getConstant(i, MVT::i32)),
 | 
					 | 
				
			||||||
                         DAG.getConstant(j, MVT::i32)));
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
    Values.push_back(Store->getValue());
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for (unsigned i = 0; i < Values.size(); ++i) {
 | 
					 | 
				
			||||||
    SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
 | 
					 | 
				
			||||||
                                  Ptr, DAG.getConstant(i, MVT::i32));
 | 
					 | 
				
			||||||
    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
 | 
					 | 
				
			||||||
                        Chain, Values[i], PartPtr,
 | 
					 | 
				
			||||||
                        DAG.getTargetConstant(0, MVT::i32));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return Chain;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
 | 
					SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -43,7 +43,11 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-LABEL: @load_v4i8_to_v4f32:
 | 
					; SI-LABEL: @load_v4i8_to_v4f32:
 | 
				
			||||||
; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
 | 
					; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
 | 
				
			||||||
 | 
					; BUFFER_LOAD_DWORD requires dword alignment.
 | 
				
			||||||
 | 
					; SI: BUFFER_LOAD_USHORT
 | 
				
			||||||
 | 
					; SI: BUFFER_LOAD_USHORT
 | 
				
			||||||
 | 
					; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
 | 
				
			||||||
; SI-NOT: BFE
 | 
					; SI-NOT: BFE
 | 
				
			||||||
; SI-NOT: LSHR
 | 
					; SI-NOT: LSHR
 | 
				
			||||||
; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
 | 
					; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,9 +7,7 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
 | 
				
			|||||||
; SI-LABEL: @private_access_f64_alloca:
 | 
					; SI-LABEL: @private_access_f64_alloca:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-ALLOCA: BUFFER_STORE_DWORDX2
 | 
					; SI-ALLOCA: BUFFER_STORE_DWORDX2
 | 
				
			||||||
; FIXME: We should be able to use BUFFER_LOAD_DWORDX2
 | 
					; SI-ALLOCA: BUFFER_LOAD_DWORDX2
 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-PROMOTE: DS_WRITE_B64
 | 
					; SI-PROMOTE: DS_WRITE_B64
 | 
				
			||||||
; SI-PROMOTE: DS_READ_B64
 | 
					; SI-PROMOTE: DS_READ_B64
 | 
				
			||||||
@@ -27,11 +25,7 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
 | 
				
			|||||||
; SI-LABEL: @private_access_v2f64_alloca:
 | 
					; SI-LABEL: @private_access_v2f64_alloca:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-ALLOCA: BUFFER_STORE_DWORDX4
 | 
					; SI-ALLOCA: BUFFER_STORE_DWORDX4
 | 
				
			||||||
; FIXME: We should be able to use BUFFER_LOAD_DWORDX4
 | 
					; SI-ALLOCA: BUFFER_LOAD_DWORDX4
 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-PROMOTE: DS_WRITE_B32
 | 
					; SI-PROMOTE: DS_WRITE_B32
 | 
				
			||||||
; SI-PROMOTE: DS_WRITE_B32
 | 
					; SI-PROMOTE: DS_WRITE_B32
 | 
				
			||||||
@@ -55,9 +49,7 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
 | 
				
			|||||||
; SI-LABEL: @private_access_i64_alloca:
 | 
					; SI-LABEL: @private_access_i64_alloca:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-ALLOCA: BUFFER_STORE_DWORDX2
 | 
					; SI-ALLOCA: BUFFER_STORE_DWORDX2
 | 
				
			||||||
; FIXME: We should be able to use BUFFER_LOAD_DWORDX2
 | 
					; SI-ALLOCA: BUFFER_LOAD_DWORDX2
 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-PROMOTE: DS_WRITE_B64
 | 
					; SI-PROMOTE: DS_WRITE_B64
 | 
				
			||||||
; SI-PROMOTE: DS_READ_B64
 | 
					; SI-PROMOTE: DS_READ_B64
 | 
				
			||||||
@@ -75,11 +67,7 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
 | 
				
			|||||||
; SI-LABEL: @private_access_v2i64_alloca:
 | 
					; SI-LABEL: @private_access_v2i64_alloca:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-ALLOCA: BUFFER_STORE_DWORDX4
 | 
					; SI-ALLOCA: BUFFER_STORE_DWORDX4
 | 
				
			||||||
; FIXME: We should be able to use BUFFER_LOAD_DWORDX4
 | 
					; SI-ALLOCA: BUFFER_LOAD_DWORDX4
 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
; SI-ALLOCA: BUFFER_LOAD_DWORD
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-PROMOTE: DS_WRITE_B32
 | 
					; SI-PROMOTE: DS_WRITE_B32
 | 
				
			||||||
; SI-PROMOTE: DS_WRITE_B32
 | 
					; SI-PROMOTE: DS_WRITE_B32
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user