mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-28 07:17:32 +00:00
R600/SI: Improve legalization of vector operations
This should fix hangs in the OpenCL piglit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188431 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -79,8 +79,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::LOAD, MVT::f64, Promote);
|
setOperationAction(ISD::LOAD, MVT::f64, Promote);
|
||||||
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
|
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
|
||||||
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand);
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand);
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
|
||||||
|
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
|
||||||
|
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
|
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
|
||||||
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
|
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
|
||||||
@@ -182,6 +184,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||||||
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
|
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
|
||||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||||
// AMDGPU DAG lowering
|
// AMDGPU DAG lowering
|
||||||
|
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
|
||||||
|
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
|
||||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||||
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
||||||
}
|
}
|
||||||
@@ -208,6 +212,47 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
|
|||||||
return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
|
return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
|
||||||
|
SmallVectorImpl<SDValue> &Args,
|
||||||
|
unsigned Start,
|
||||||
|
unsigned Count) const {
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
for (unsigned i = Start, e = Start + Count; i != e; ++i) {
|
||||||
|
Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
|
||||||
|
VT.getVectorElementType(),
|
||||||
|
Op, DAG.getConstant(i, MVT::i32)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
SmallVector<SDValue, 8> Args;
|
||||||
|
SDValue A = Op.getOperand(0);
|
||||||
|
SDValue B = Op.getOperand(1);
|
||||||
|
|
||||||
|
ExtractVectorElements(A, DAG, Args, 0,
|
||||||
|
A.getValueType().getVectorNumElements());
|
||||||
|
ExtractVectorElements(B, DAG, Args, 0,
|
||||||
|
B.getValueType().getVectorNumElements());
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
|
||||||
|
&Args[0], Args.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
|
||||||
|
SmallVector<SDValue, 8> Args;
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||||
|
ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
|
||||||
|
VT.getVectorNumElements());
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
|
||||||
|
&Args[0], Args.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||||
|
@@ -25,6 +25,11 @@ class MachineRegisterInfo;
|
|||||||
|
|
||||||
class AMDGPUTargetLowering : public TargetLowering {
|
class AMDGPUTargetLowering : public TargetLowering {
|
||||||
private:
|
private:
|
||||||
|
void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
|
||||||
|
SmallVectorImpl<SDValue> &Args,
|
||||||
|
unsigned Start, unsigned Count) const;
|
||||||
|
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
@@ -34,9 +34,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||||||
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
|
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
|
||||||
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
|
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
|
||||||
|
|
||||||
addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass);
|
|
||||||
addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass);
|
|
||||||
|
|
||||||
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
|
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
|
||||||
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
|
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
|
||||||
|
|
||||||
@@ -110,6 +107,9 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
|||||||
return VT.bitsGT(MVT::i32);
|
return VT.bitsGT(MVT::i32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
|
||||||
|
return VT.bitsLE(MVT::i8);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT,
|
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT,
|
||||||
SDLoc DL, SDValue Chain,
|
SDLoc DL, SDValue Chain,
|
||||||
|
@@ -47,6 +47,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
|||||||
public:
|
public:
|
||||||
SITargetLowering(TargetMachine &tm);
|
SITargetLowering(TargetMachine &tm);
|
||||||
bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const;
|
bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const;
|
||||||
|
virtual bool shouldSplitVectorElementType(EVT VT) const;
|
||||||
|
|
||||||
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||||
bool isVarArg,
|
bool isVarArg,
|
||||||
|
111
test/CodeGen/R600/si-vector-hang.ll
Normal file
111
test/CodeGen/R600/si-vector-hang.ll
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
|
; XXX: Mark this test as XFAIL until buffer stores are implemented
|
||||||
|
; XFAIL: *
|
||||||
|
; CHECK: @test_8_min_char
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; CHECK: BUFFER_STORE_BYTE
|
||||||
|
; ModuleID = 'radeon'
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
|
||||||
|
target triple = "r600--"
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
|
||||||
|
entry:
|
||||||
|
%0 = load i8 addrspace(1)* %in0, align 1, !tbaa !9
|
||||||
|
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
|
||||||
|
%arrayidx2.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 1
|
||||||
|
%2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1, !tbaa !9
|
||||||
|
%3 = insertelement <8 x i8> %1, i8 %2, i32 1
|
||||||
|
%arrayidx6.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 2
|
||||||
|
%4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1, !tbaa !9
|
||||||
|
%5 = insertelement <8 x i8> %3, i8 %4, i32 2
|
||||||
|
%arrayidx10.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 3
|
||||||
|
%6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1, !tbaa !9
|
||||||
|
%7 = insertelement <8 x i8> %5, i8 %6, i32 3
|
||||||
|
%arrayidx.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 4
|
||||||
|
%8 = load i8 addrspace(1)* %arrayidx.i.i, align 1, !tbaa !9
|
||||||
|
%9 = insertelement <8 x i8> undef, i8 %8, i32 0
|
||||||
|
%arrayidx2.i9.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 5
|
||||||
|
%10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1, !tbaa !9
|
||||||
|
%11 = insertelement <8 x i8> %9, i8 %10, i32 1
|
||||||
|
%arrayidx6.i11.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 6
|
||||||
|
%12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1, !tbaa !9
|
||||||
|
%13 = insertelement <8 x i8> %11, i8 %12, i32 2
|
||||||
|
%arrayidx10.i13.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 7
|
||||||
|
%14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1, !tbaa !9
|
||||||
|
%15 = insertelement <8 x i8> %13, i8 %14, i32 3
|
||||||
|
%vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||||
|
%16 = load i8 addrspace(1)* %in1, align 1, !tbaa !9
|
||||||
|
%17 = insertelement <8 x i8> undef, i8 %16, i32 0
|
||||||
|
%arrayidx2.i.i4 = getelementptr inbounds i8 addrspace(1)* %in1, i64 1
|
||||||
|
%18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1, !tbaa !9
|
||||||
|
%19 = insertelement <8 x i8> %17, i8 %18, i32 1
|
||||||
|
%arrayidx6.i.i5 = getelementptr inbounds i8 addrspace(1)* %in1, i64 2
|
||||||
|
%20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1, !tbaa !9
|
||||||
|
%21 = insertelement <8 x i8> %19, i8 %20, i32 2
|
||||||
|
%arrayidx10.i.i6 = getelementptr inbounds i8 addrspace(1)* %in1, i64 3
|
||||||
|
%22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1, !tbaa !9
|
||||||
|
%23 = insertelement <8 x i8> %21, i8 %22, i32 3
|
||||||
|
%arrayidx.i.i7 = getelementptr inbounds i8 addrspace(1)* %in1, i64 4
|
||||||
|
%24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1, !tbaa !9
|
||||||
|
%25 = insertelement <8 x i8> undef, i8 %24, i32 0
|
||||||
|
%arrayidx2.i9.i8 = getelementptr inbounds i8 addrspace(1)* %in1, i64 5
|
||||||
|
%26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1, !tbaa !9
|
||||||
|
%27 = insertelement <8 x i8> %25, i8 %26, i32 1
|
||||||
|
%arrayidx6.i11.i9 = getelementptr inbounds i8 addrspace(1)* %in1, i64 6
|
||||||
|
%28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1, !tbaa !9
|
||||||
|
%29 = insertelement <8 x i8> %27, i8 %28, i32 2
|
||||||
|
%arrayidx10.i13.i10 = getelementptr inbounds i8 addrspace(1)* %in1, i64 7
|
||||||
|
%30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1, !tbaa !9
|
||||||
|
%31 = insertelement <8 x i8> %29, i8 %30, i32 3
|
||||||
|
%vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||||
|
%cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
|
||||||
|
%cond.i = select <8 x i1> %cmp.i, <8 x i8> %vecinit5.i, <8 x i8> %vecinit5.i11
|
||||||
|
%32 = extractelement <8 x i8> %cond.i, i32 0
|
||||||
|
store i8 %32, i8 addrspace(1)* %out, align 1, !tbaa !9
|
||||||
|
%33 = extractelement <8 x i8> %cond.i, i32 1
|
||||||
|
%arrayidx2.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 1
|
||||||
|
store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1, !tbaa !9
|
||||||
|
%34 = extractelement <8 x i8> %cond.i, i32 2
|
||||||
|
%arrayidx.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 2
|
||||||
|
store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1, !tbaa !9
|
||||||
|
%35 = extractelement <8 x i8> %cond.i, i32 3
|
||||||
|
%arrayidx2.i6.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 3
|
||||||
|
store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1, !tbaa !9
|
||||||
|
%arrayidx.i.i3 = getelementptr inbounds i8 addrspace(1)* %out, i64 4
|
||||||
|
%36 = extractelement <8 x i8> %cond.i, i32 4
|
||||||
|
store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1, !tbaa !9
|
||||||
|
%37 = extractelement <8 x i8> %cond.i, i32 5
|
||||||
|
%arrayidx2.i.i6.i = getelementptr inbounds i8 addrspace(1)* %out, i64 5
|
||||||
|
store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1, !tbaa !9
|
||||||
|
%38 = extractelement <8 x i8> %cond.i, i32 6
|
||||||
|
%arrayidx.i.i7.i = getelementptr inbounds i8 addrspace(1)* %out, i64 6
|
||||||
|
store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1, !tbaa !9
|
||||||
|
%39 = extractelement <8 x i8> %cond.i, i32 7
|
||||||
|
%arrayidx2.i6.i8.i = getelementptr inbounds i8 addrspace(1)* %out, i64 7
|
||||||
|
store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1, !tbaa !9
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
|
||||||
|
!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
|
||||||
|
|
||||||
|
!0 = metadata !{null}
|
||||||
|
!1 = metadata !{null}
|
||||||
|
!2 = metadata !{null}
|
||||||
|
!3 = metadata !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
|
||||||
|
!4 = metadata !{null}
|
||||||
|
!5 = metadata !{null}
|
||||||
|
!6 = metadata !{null}
|
||||||
|
!7 = metadata !{null}
|
||||||
|
!8 = metadata !{null}
|
||||||
|
!9 = metadata !{metadata !"omnipotent char", metadata !10}
|
||||||
|
!10 = metadata !{metadata !"Simple C/C++ TBAA"}
|
Reference in New Issue
Block a user