mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 16:17:05 +00:00 
			
		
		
		
	R600/SI: Add support for i8 and i16 private loads/stores
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199823 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -589,18 +589,96 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, | |||||||
|   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); |   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | ||||||
|  |   SDLoc DL(Op); | ||||||
|  |   LoadSDNode *Load = cast<LoadSDNode>(Op); | ||||||
|  |   ISD::LoadExtType ExtType = Load->getExtensionType(); | ||||||
|  |  | ||||||
|  |   if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS || | ||||||
|  |       ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32)) | ||||||
|  |     return SDValue(); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   EVT VT = Op.getValueType(); | ||||||
|  |   EVT MemVT = Load->getMemoryVT(); | ||||||
|  |   unsigned Mask = 0; | ||||||
|  |   if (Load->getMemoryVT() == MVT::i8) { | ||||||
|  |     Mask = 0xff; | ||||||
|  |   } else if (Load->getMemoryVT() == MVT::i16) { | ||||||
|  |     Mask = 0xffff; | ||||||
|  |   } | ||||||
|  |   SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), | ||||||
|  |                             DAG.getConstant(2, MVT::i32)); | ||||||
|  |   SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), | ||||||
|  |                             Load->getChain(), Ptr, | ||||||
|  |                             DAG.getTargetConstant(0, MVT::i32), | ||||||
|  |                             Op.getOperand(2)); | ||||||
|  |   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, | ||||||
|  |                                 Load->getBasePtr(), | ||||||
|  |                                 DAG.getConstant(0x3, MVT::i32)); | ||||||
|  |   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, | ||||||
|  |                                  DAG.getConstant(3, MVT::i32)); | ||||||
|  |   Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt); | ||||||
|  |   Ret = DAG.getNode(ISD::AND, DL, MVT::i32, Ret, | ||||||
|  |                     DAG.getConstant(Mask, MVT::i32)); | ||||||
|  |   if (ExtType == ISD::SEXTLOAD) { | ||||||
|  |     SDValue SExtShift = DAG.getConstant( | ||||||
|  |         VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32); | ||||||
|  |     Ret = DAG.getNode(ISD::SHL, DL, MVT::i32, Ret, SExtShift); | ||||||
|  |     Ret = DAG.getNode(ISD::SRA, DL, MVT::i32, Ret, SExtShift); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return Ret; | ||||||
|  | } | ||||||
|  |  | ||||||
| SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | ||||||
|  |   SDLoc DL(Op); | ||||||
|   SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); |   SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); | ||||||
|   if (Result.getNode()) { |   if (Result.getNode()) { | ||||||
|     return Result; |     return Result; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   StoreSDNode *Store = cast<StoreSDNode>(Op); |   StoreSDNode *Store = cast<StoreSDNode>(Op); | ||||||
|  |   SDValue Chain = Store->getChain(); | ||||||
|   if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || |   if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || | ||||||
|        Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && |        Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && | ||||||
|       Store->getValue().getValueType().isVector()) { |       Store->getValue().getValueType().isVector()) { | ||||||
|     return SplitVectorStore(Op, DAG); |     return SplitVectorStore(Op, DAG); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS && | ||||||
|  |       Store->getMemoryVT().bitsLT(MVT::i32)) { | ||||||
|  |     unsigned Mask = 0; | ||||||
|  |     if (Store->getMemoryVT() == MVT::i8) { | ||||||
|  |       Mask = 0xff; | ||||||
|  |     } else if (Store->getMemoryVT() == MVT::i16) { | ||||||
|  |       Mask = 0xffff; | ||||||
|  |     } | ||||||
|  |     SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32); | ||||||
|  |     SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, | ||||||
|  |                               DAG.getConstant(2, MVT::i32)); | ||||||
|  |     SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, | ||||||
|  |                               Chain, Ptr, DAG.getTargetConstant(0, MVT::i32)); | ||||||
|  |     SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, TruncPtr, | ||||||
|  |                                   DAG.getConstant(0x3, MVT::i32)); | ||||||
|  |     SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, | ||||||
|  |                                    DAG.getConstant(3, MVT::i32)); | ||||||
|  |     SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, | ||||||
|  |                                     Store->getValue()); | ||||||
|  |     SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, SExtValue, | ||||||
|  |                                       DAG.getConstant(Mask, MVT::i32)); | ||||||
|  |     SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, | ||||||
|  |                                        MaskedValue, ShiftAmt); | ||||||
|  |     SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32), | ||||||
|  |                                   ShiftAmt); | ||||||
|  |     DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask, | ||||||
|  |                           DAG.getConstant(0xffffffff, MVT::i32)); | ||||||
|  |     Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); | ||||||
|  |  | ||||||
|  |     SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); | ||||||
|  |     return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, | ||||||
|  |                        Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32)); | ||||||
|  |   } | ||||||
|   return SDValue(); |   return SDValue(); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -54,6 +54,7 @@ protected: | |||||||
|   /// \brief Split a vector load into multiple scalar loads. |   /// \brief Split a vector load into multiple scalar loads. | ||||||
|   SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; |   SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; | ||||||
|   SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; |   SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; | ||||||
|  |   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; | ||||||
|   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; | ||||||
|   bool isHWTrueValue(SDValue Op) const; |   bool isHWTrueValue(SDValue Op) const; | ||||||
|   bool isHWFalseValue(SDValue Op) const; |   bool isHWFalseValue(SDValue Op) const; | ||||||
|   | |||||||
| @@ -1113,6 +1113,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | |||||||
|     return SDValue(); |     return SDValue(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); | ||||||
|  |   if (Ret.getNode()) { | ||||||
|  |     return Ret; | ||||||
|  |   } | ||||||
|   // Lowering for indirect addressing |   // Lowering for indirect addressing | ||||||
|  |  | ||||||
|   const MachineFunction &MF = DAG.getMachineFunction(); |   const MachineFunction &MF = DAG.getMachineFunction(); | ||||||
| @@ -1204,6 +1208,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const | |||||||
|   SDValue Ptr = Op.getOperand(1); |   SDValue Ptr = Op.getOperand(1); | ||||||
|   SDValue LoweredLoad; |   SDValue LoweredLoad; | ||||||
|  |  | ||||||
|  |   SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); | ||||||
|  |   if (Ret.getNode()) { | ||||||
|  |     SDValue Ops[2]; | ||||||
|  |     Ops[0] = Ret; | ||||||
|  |     Ops[1] = Chain; | ||||||
|  |     return DAG.getMergeValues(Ops, 2, DL); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { |   if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { | ||||||
|     SDValue MergedValues[2] = { |     SDValue MergedValues[2] = { | ||||||
|       SplitVectorLoad(Op, DAG), |       SplitVectorLoad(Op, DAG), | ||||||
|   | |||||||
| @@ -125,11 +125,17 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : | |||||||
|   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); | ||||||
|  |  | ||||||
|   setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); |   setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); | ||||||
|   setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); |   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); | ||||||
|  |   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); | ||||||
|   setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); |   setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); | ||||||
|   setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); |   setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); | ||||||
|  |  | ||||||
|  |   setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); | ||||||
|  |   setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); | ||||||
|  |   setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); | ||||||
|   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); |   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); | ||||||
|  |   setTruncStoreAction(MVT::i32, MVT::i8, Custom); | ||||||
|  |   setTruncStoreAction(MVT::i32, MVT::i16, Custom); | ||||||
|   setTruncStoreAction(MVT::f64, MVT::f32, Expand); |   setTruncStoreAction(MVT::f64, MVT::f32, Expand); | ||||||
|   setTruncStoreAction(MVT::i64, MVT::i32, Expand); |   setTruncStoreAction(MVT::i64, MVT::i32, Expand); | ||||||
|   setTruncStoreAction(MVT::i128, MVT::i64, Expand); |   setTruncStoreAction(MVT::i128, MVT::i64, Expand); | ||||||
| @@ -700,21 +706,26 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, | |||||||
| SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | ||||||
|   SDLoc DL(Op); |   SDLoc DL(Op); | ||||||
|   LoadSDNode *Load = cast<LoadSDNode>(Op); |   LoadSDNode *Load = cast<LoadSDNode>(Op); | ||||||
|  |   SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); | ||||||
|  |   SDValue MergedValues[2]; | ||||||
|  |   MergedValues[1] = Load->getChain(); | ||||||
|  |   if (Ret.getNode()) { | ||||||
|  |     MergedValues[0] = Ret; | ||||||
|  |     return DAG.getMergeValues(MergedValues, 2, DL); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) |   if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { | ||||||
|     return SDValue(); |     return SDValue(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), |   SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), | ||||||
|                             DAG.getConstant(2, MVT::i32)); |                             DAG.getConstant(2, MVT::i32)); | ||||||
|  |   Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), | ||||||
|   SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), |  | ||||||
|                     Load->getChain(), Ptr, |                     Load->getChain(), Ptr, | ||||||
|                     DAG.getTargetConstant(0, MVT::i32), |                     DAG.getTargetConstant(0, MVT::i32), | ||||||
|                     Op.getOperand(2)); |                     Op.getOperand(2)); | ||||||
|   SDValue MergedValues[2] = { |  | ||||||
|     Ret, |   MergedValues[0] = Ret; | ||||||
|     Load->getChain() |  | ||||||
|   }; |  | ||||||
|   return DAG.getMergeValues(MergedValues, 2, DL); |   return DAG.getMergeValues(MergedValues, 2, DL); | ||||||
|  |  | ||||||
| } | } | ||||||
| @@ -796,7 +807,34 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | |||||||
|   SDValue Chain = Store->getChain(); |   SDValue Chain = Store->getChain(); | ||||||
|   SmallVector<SDValue, 8> Values; |   SmallVector<SDValue, 8> Values; | ||||||
|  |  | ||||||
|   if (VT == MVT::i64) { |   if (Store->isTruncatingStore()) { | ||||||
|  |     unsigned Mask = 0; | ||||||
|  |     if (Store->getMemoryVT() == MVT::i8) { | ||||||
|  |       Mask = 0xff; | ||||||
|  |     } else if (Store->getMemoryVT() == MVT::i16) { | ||||||
|  |       Mask = 0xffff; | ||||||
|  |     } | ||||||
|  |     SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, | ||||||
|  |                               Chain, Store->getBasePtr(), | ||||||
|  |                               DAG.getConstant(0, MVT::i32)); | ||||||
|  |     SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(), | ||||||
|  |                                   DAG.getConstant(0x3, MVT::i32)); | ||||||
|  |     SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, | ||||||
|  |                                    DAG.getConstant(3, MVT::i32)); | ||||||
|  |     SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(), | ||||||
|  |                                       DAG.getConstant(Mask, MVT::i32)); | ||||||
|  |     SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, | ||||||
|  |                                        MaskedValue, ShiftAmt); | ||||||
|  |     SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32, | ||||||
|  |                                   DAG.getConstant(32, MVT::i32), ShiftAmt); | ||||||
|  |     SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32, | ||||||
|  |                                   DAG.getConstant(Mask, MVT::i32), | ||||||
|  |                                   RotrAmt); | ||||||
|  |     Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); | ||||||
|  |     Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); | ||||||
|  |  | ||||||
|  |     Values.push_back(Dst); | ||||||
|  |   } else if (VT == MVT::i64) { | ||||||
|     for (unsigned i = 0; i < 2; ++i) { |     for (unsigned i = 0; i < 2; ++i) { | ||||||
|       Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, |       Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, | ||||||
|                        Store->getValue(), DAG.getConstant(i, MVT::i32))); |                        Store->getValue(), DAG.getConstant(i, MVT::i32))); | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ | |||||||
|  |  | ||||||
| ; EG-LABEL: @anyext_load_i8: | ; EG-LABEL: @anyext_load_i8: | ||||||
| ; EG: AND_INT | ; EG: AND_INT | ||||||
| ; EG-NEXT: 255 | ; EG: 255 | ||||||
| define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { | define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { | ||||||
|   %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)* |   %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)* | ||||||
|   %load = load i32 addrspace(1)* %cast, align 1 |   %load = load i32 addrspace(1)* %cast, align 1 | ||||||
| @@ -14,8 +14,9 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac | |||||||
|  |  | ||||||
| ; EG-LABEL: @anyext_load_i16: | ; EG-LABEL: @anyext_load_i16: | ||||||
| ; EG: AND_INT | ; EG: AND_INT | ||||||
| ; EG: LSHL | ; EG: AND_INT | ||||||
| ; EG: 65535 | ; EG-DAG: 65535 | ||||||
|  | ; EG-DAG: -65536 | ||||||
| define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind { | define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind { | ||||||
|   %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)* |   %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)* | ||||||
|   %load = load i32 addrspace(1)* %cast, align 1 |   %load = load i32 addrspace(1)* %cast, align 1 | ||||||
| @@ -27,7 +28,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs | |||||||
|  |  | ||||||
| ; EG-LABEL: @anyext_load_lds_i8: | ; EG-LABEL: @anyext_load_lds_i8: | ||||||
| ; EG: AND_INT | ; EG: AND_INT | ||||||
| ; EG-NEXT: 255 | ; EG: 255 | ||||||
| define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { | define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { | ||||||
|   %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)* |   %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)* | ||||||
|   %load = load i32 addrspace(3)* %cast, align 1 |   %load = load i32 addrspace(3)* %cast, align 1 | ||||||
| @@ -39,8 +40,9 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr | |||||||
|  |  | ||||||
| ; EG-LABEL: @anyext_load_lds_i16: | ; EG-LABEL: @anyext_load_lds_i16: | ||||||
| ; EG: AND_INT | ; EG: AND_INT | ||||||
| ; EG: LSHL | ; EG: AND_INT | ||||||
| ; EG: 65535 | ; EG-DAG: 65535 | ||||||
|  | ; EG-DAG: -65536 | ||||||
| define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind { | define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind { | ||||||
|   %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)* |   %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)* | ||||||
|   %load = load i32 addrspace(3)* %cast, align 1 |   %load = load i32 addrspace(3)* %cast, align 1 | ||||||
|   | |||||||
| @@ -1,10 +1,11 @@ | |||||||
| ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK | ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC | ||||||
| ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK | ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC | ||||||
|  |  | ||||||
| ; This test checks that uses and defs of the AR register happen in the same | ; This test checks that uses and defs of the AR register happen in the same | ||||||
| ; instruction clause. | ; instruction clause. | ||||||
|  |  | ||||||
| ; R600-CHECK-LABEL: @mova_same_clause | ; FUNC-LABEL: @mova_same_clause | ||||||
|  |  | ||||||
| ; R600-CHECK: MOVA_INT | ; R600-CHECK: MOVA_INT | ||||||
| ; R600-CHECK-NOT: ALU clause | ; R600-CHECK-NOT: ALU clause | ||||||
| ; R600-CHECK: 0 + AR.x | ; R600-CHECK: 0 + AR.x | ||||||
| @@ -12,7 +13,6 @@ | |||||||
| ; R600-CHECK-NOT: ALU clause | ; R600-CHECK-NOT: ALU clause | ||||||
| ; R600-CHECK: 0 + AR.x | ; R600-CHECK: 0 + AR.x | ||||||
|  |  | ||||||
| ; SI-CHECK-LABEL: @mova_same_clause |  | ||||||
| ; SI-CHECK: V_READFIRSTLANE | ; SI-CHECK: V_READFIRSTLANE | ||||||
| ; SI-CHECK: V_MOVRELD | ; SI-CHECK: V_MOVRELD | ||||||
| ; SI-CHECK: S_CBRANCH | ; SI-CHECK: S_CBRANCH | ||||||
| @@ -46,9 +46,8 @@ entry: | |||||||
| ; XXX: This generated code has unnecessary MOVs, we should be able to optimize | ; XXX: This generated code has unnecessary MOVs, we should be able to optimize | ||||||
| ; this. | ; this. | ||||||
|  |  | ||||||
| ; R600-CHECK-LABEL: @multiple_structs | ; FUNC-LABEL: @multiple_structs | ||||||
| ; R600-CHECK-NOT: MOVA_INT | ; R600-CHECK-NOT: MOVA_INT | ||||||
| ; SI-CHECK-LABEL: @multiple_structs |  | ||||||
| ; SI-CHECK-NOT: V_MOVREL | ; SI-CHECK-NOT: V_MOVREL | ||||||
| %struct.point = type { i32, i32 } | %struct.point = type { i32, i32 } | ||||||
|  |  | ||||||
| @@ -77,9 +76,8 @@ entry: | |||||||
| ; loads and stores should be lowered to copies, so there shouldn't be any | ; loads and stores should be lowered to copies, so there shouldn't be any | ||||||
| ; MOVA instructions. | ; MOVA instructions. | ||||||
|  |  | ||||||
| ; R600-CHECK-LABEL: @direct_loop | ; FUNC-LABEL: @direct_loop | ||||||
| ; R600-CHECK-NOT: MOVA_INT | ; R600-CHECK-NOT: MOVA_INT | ||||||
| ; SI-CHECK-LABEL: @direct_loop |  | ||||||
| ; SI-CHECK-NOT: V_MOVREL | ; SI-CHECK-NOT: V_MOVREL | ||||||
|  |  | ||||||
| define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | ||||||
| @@ -113,3 +111,48 @@ for.end: | |||||||
|   store i32 %value, i32 addrspace(1)* %out |   store i32 %value, i32 addrspace(1)* %out | ||||||
|   ret void |   ret void | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ; FUNC-LABEL: @short_array | ||||||
|  |  | ||||||
|  | ; R600-CHECK: MOV {{\** *}}T{{[0-9]\.[XYZW]}}, literal | ||||||
|  | ; R600-CHECK: 65536 | ||||||
|  | ; R600-CHECK: MOVA_INT | ||||||
|  |  | ||||||
|  | ; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 65536 | ||||||
|  | ; SI-CHECK: V_MOVRELS_B32_e32 | ||||||
|  | define void @short_array(i32 addrspace(1)* %out, i32 %index) { | ||||||
|  | entry: | ||||||
|  |   %0 = alloca [2 x i16] | ||||||
|  |   %1 = getelementptr [2 x i16]* %0, i32 0, i32 0 | ||||||
|  |   %2 = getelementptr [2 x i16]* %0, i32 0, i32 1 | ||||||
|  |   store i16 0, i16* %1 | ||||||
|  |   store i16 1, i16* %2 | ||||||
|  |   %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index | ||||||
|  |   %4 = load i16* %3 | ||||||
|  |   %5 = sext i16 %4 to i32 | ||||||
|  |   store i32 %5, i32 addrspace(1)* %out | ||||||
|  |   ret void | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ; FUNC-LABEL: @char_array | ||||||
|  |  | ||||||
|  | ; R600-CHECK: OR_INT {{\** *}}T{{[0-9]\.[XYZW]}}, {{[PVT0-9]+\.[XYZW]}}, literal | ||||||
|  | ; R600-CHECK: 256 | ||||||
|  | ; R600-CHECK: MOVA_INT | ||||||
|  |  | ||||||
|  | ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 256 | ||||||
|  | ; SI-CHECK: V_MOVRELS_B32_e32 | ||||||
|  | define void @char_array(i32 addrspace(1)* %out, i32 %index) { | ||||||
|  | entry: | ||||||
|  |   %0 = alloca [2 x i8] | ||||||
|  |   %1 = getelementptr [2 x i8]* %0, i32 0, i32 0 | ||||||
|  |   %2 = getelementptr [2 x i8]* %0, i32 0, i32 1 | ||||||
|  |   store i8 0, i8* %1 | ||||||
|  |   store i8 1, i8* %2 | ||||||
|  |   %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index | ||||||
|  |   %4 = load i8* %3 | ||||||
|  |   %5 = sext i8 %4 to i32 | ||||||
|  |   store i32 %5, i32 addrspace(1)* %out | ||||||
|  |   ret void | ||||||
|  |  | ||||||
|  | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user