diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index fbcdc6aec4f..65cdb246733 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -42,6 +42,17 @@ def CC_SI : CallingConv<[ ]>; +// Calling convention for R600 +def CC_R600 : CallingConv<[ + CCIfInReg>> +]>; + // Calling convention for compute kernels def CC_AMDGPU_Kernel : CallingConv<[ CCCustom<"allocateStack"> @@ -57,5 +68,7 @@ def CC_AMDGPU : CallingConv<[ "State.getMachineFunction().getInfo()->" "ShaderType == ShaderType::COMPUTE", CCDelegateTo>, CCIf<"State.getTarget().getSubtarget()"# - ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo> + ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo>, + CCIf<"State.getTarget().getSubtarget()"# + ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo> ]>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a7522b2771b..336a2991d10 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -554,51 +554,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const SDLoc DL(Op); switch(IntrinsicID) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case AMDGPUIntrinsic::R600_load_input: { - int64_t RegIndex = cast(Op.getOperand(1))->getZExtValue(); - unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.addLiveIn(Reg); - return DAG.getCopyFromReg(DAG.getEntryNode(), - SDLoc(DAG.getEntryNode()), Reg, VT); - } - - case AMDGPUIntrinsic::R600_interp_input: { + case AMDGPUIntrinsic::R600_interp_xy: + case AMDGPUIntrinsic::R600_interp_zw: { int slot = cast(Op.getOperand(1))->getZExtValue(); - int ijb = cast(Op.getOperand(2))->getSExtValue(); MachineSDNode *interp; - if (ijb < 0) { - const MachineFunction &MF = DAG.getMachineFunction(); - const R600InstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); - interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, - MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); - return DAG.getTargetExtractSubreg( - TII->getRegisterInfo().getSubRegFromChannel(slot % 4), - DL, MVT::f32, SDValue(interp, 0)); - } + SDValue RegisterINode = Op.getOperand(2); + SDValue RegisterJNode = Op.getOperand(3); - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb); - unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1); - MRI.addLiveIn(RegisterI); - MRI.addLiveIn(RegisterJ); - SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(), - SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32); - SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(), - SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32); - - if (slot % 4 < 2) + if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy) interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, - MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32), RegisterJNode, RegisterINode); else interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, - MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32), RegisterJNode, RegisterINode); - return SDValue(interp, slot % 2); + return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, + SDValue(interp, 0), SDValue(interp, 1)); } case AMDGPUIntrinsic::R600_tex: case AMDGPUIntrinsic::R600_texc: @@ -1339,6 +1311,8 @@ SDValue R600TargetLowering::LowerFormalArguments( SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned ShaderType = MF.getInfo()->ShaderType; SmallVector LocalIns; @@ -1352,6 +1326,13 @@ SDValue R600TargetLowering::LowerFormalArguments( EVT VT = Ins[i].VT; EVT MemVT = LocalIns[i].VT; + if (ShaderType != ShaderType::COMPUTE) { + unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); + SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); + InVals.push_back(Register); + continue; + } + PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_BUFFER_0); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0734cca48b6..0b2e6ec2bc0 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -418,7 +418,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst", - []>; + [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index b5cb3698f18..cd0b4193c3c 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -39,10 +39,14 @@ let TargetPrefix = "R600", isTarget = 1 in { llvm_i32_ty // coord_type_w ], [IntrNoMem]>; - def int_R600_load_input : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_R600_interp_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_interp_const : + Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_R600_interp_xy : + Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; +def int_R600_interp_zw : + Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_R600_load_texbuf : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_R600_tex : TextureIntrinsicFloatInput; diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll index 75f24588c17..6b683769fe0 100644 --- a/test/CodeGen/R600/big_alu.ll +++ b/test/CodeGen/R600/big_alu.ll @@ -4,54 +4,54 @@ ;This test ensures that R600 backend can handle ifcvt properly ;and do not generate ALU clauses with more than 128 instructions. -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 0) - %1 = call float @llvm.R600.load.input(i32 1) - %2 = call float @llvm.R600.load.input(i32 2) - %3 = call float @llvm.R600.load.input(i32 3) - %4 = call float @llvm.R600.load.input(i32 4) - %5 = call float @llvm.R600.load.input(i32 36) - %6 = call float @llvm.R600.load.input(i32 32) + %0 = extractelement <4 x float> %reg0, i32 0 + %1 = extractelement <4 x float> %reg0, i32 1 + %2 = extractelement <4 x float> %reg0, i32 2 + %3 = extractelement <4 x float> %reg0, i32 3 + %4 = extractelement <4 x float> %reg1, i32 0 + %5 = extractelement <4 x float> %reg9, i32 0 + %6 = extractelement <4 x float> %reg8, i32 0 %7 = fcmp ugt float %6, 0.000000e+00 %8 = select i1 %7, float %4, float %5 - %9 = call float @llvm.R600.load.input(i32 5) - %10 = call float @llvm.R600.load.input(i32 37) - %11 = call float @llvm.R600.load.input(i32 32) + %9 = extractelement <4 x float> %reg1, i32 1 + %10 = extractelement <4 x float> %reg9, i32 1 + %11 = extractelement <4 x float> %reg8, i32 0 %12 = fcmp ugt float %11, 0.000000e+00 %13 = select i1 %12, float %9, float %10 - %14 = call float @llvm.R600.load.input(i32 6) - %15 = call float @llvm.R600.load.input(i32 38) - %16 = call float @llvm.R600.load.input(i32 32) + %14 = extractelement <4 x float> %reg1, i32 2 + %15 = extractelement <4 x float> %reg9, i32 2 + %16 = extractelement <4 x float> %reg8, i32 0 %17 = fcmp ugt float %16, 0.000000e+00 %18 = select i1 %17, float %14, float %15 - %19 = call float @llvm.R600.load.input(i32 7) - %20 = call float @llvm.R600.load.input(i32 39) - %21 = call float @llvm.R600.load.input(i32 32) - %22 = call float @llvm.R600.load.input(i32 8) - %23 = call float @llvm.R600.load.input(i32 9) - %24 = call float @llvm.R600.load.input(i32 10) - %25 = call float @llvm.R600.load.input(i32 11) - %26 = call float @llvm.R600.load.input(i32 12) - %27 = call float @llvm.R600.load.input(i32 13) - %28 = call float @llvm.R600.load.input(i32 14) - %29 = call float @llvm.R600.load.input(i32 15) - %30 = call float @llvm.R600.load.input(i32 16) - %31 = call float @llvm.R600.load.input(i32 17) - %32 = call float @llvm.R600.load.input(i32 18) - %33 = call float @llvm.R600.load.input(i32 19) - %34 = call float @llvm.R600.load.input(i32 20) - %35 = call float @llvm.R600.load.input(i32 21) - %36 = call float @llvm.R600.load.input(i32 22) - %37 = call float @llvm.R600.load.input(i32 23) - %38 = call float @llvm.R600.load.input(i32 24) - %39 = call float @llvm.R600.load.input(i32 25) - %40 = call float @llvm.R600.load.input(i32 26) - %41 = call float @llvm.R600.load.input(i32 27) - %42 = call float @llvm.R600.load.input(i32 28) - %43 = call float @llvm.R600.load.input(i32 29) - %44 = call float @llvm.R600.load.input(i32 30) - %45 = call float @llvm.R600.load.input(i32 31) + %19 = extractelement <4 x float> %reg1, i32 3 + %20 = extractelement <4 x float> %reg9, i32 3 + %21 = extractelement <4 x float> %reg8, i32 0 + %22 = extractelement <4 x float> %reg2, i32 0 + %23 = extractelement <4 x float> %reg2, i32 1 + %24 = extractelement <4 x float> %reg2, i32 2 + %25 = extractelement <4 x float> %reg2, i32 3 + %26 = extractelement <4 x float> %reg3, i32 0 + %27 = extractelement <4 x float> %reg3, i32 1 + %28 = extractelement <4 x float> %reg3, i32 2 + %29 = extractelement <4 x float> %reg3, i32 3 + %30 = extractelement <4 x float> %reg4, i32 0 + %31 = extractelement <4 x float> %reg4, i32 1 + %32 = extractelement <4 x float> %reg4, i32 2 + %33 = extractelement <4 x float> %reg4, i32 3 + %34 = extractelement <4 x float> %reg5, i32 0 + %35 = extractelement <4 x float> %reg5, i32 1 + %36 = extractelement <4 x float> %reg5, i32 2 + %37 = extractelement <4 x float> %reg5, i32 3 + %38 = extractelement <4 x float> %reg6, i32 0 + %39 = extractelement <4 x float> %reg6, i32 1 + %40 = extractelement <4 x float> %reg6, i32 2 + %41 = extractelement <4 x float> %reg6, i32 3 + %42 = extractelement <4 x float> %reg7, i32 0 + %43 = extractelement <4 x float> %reg7, i32 1 + %44 = extractelement <4 x float> %reg7, i32 2 + %45 = extractelement <4 x float> %reg7, i32 3 %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %47 = extractelement <4 x float> %46, i32 0 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) @@ -1146,9 +1146,6 @@ ENDIF178: ; preds = %ENDIF175, %IF179 ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll index 8dcd450debf..99f0d99b352 100644 --- a/test/CodeGen/R600/complex-folding.ll +++ b/test/CodeGen/R600/complex-folding.ll @@ -2,9 +2,9 @@ ; CHECK: @main ; CHECK-NOT: MOV -define void @main() { +define void @main(<4 x float> inreg %reg0) #0 { entry: - %0 = call float @llvm.R600.load.input(i32 0) + %0 = extractelement <4 x float> %reg0, i32 0 %1 = call float @fabs(float %0) %2 = fptoui float %1 to i32 %3 = bitcast i32 %2 to float @@ -13,6 +13,7 @@ entry: ret void } -declare float @llvm.R600.load.input(i32) readnone declare float @fabs(float ) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) \ No newline at end of file +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll index 877d69a65b4..67e86c41fdc 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/R600/floor.ll @@ -2,15 +2,15 @@ ;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @floor(float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @floor(float) readonly +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll index 75e65d8133a..935e35123f4 100644 --- a/test/CodeGen/R600/fmad.ll +++ b/test/CodeGen/R600/fmad.ll @@ -2,18 +2,18 @@ ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) - %r2 = call float @llvm.R600.load.input(i32 2) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = extractelement <4 x float> %reg0, i32 2 %r3 = fmul float %r0, %r1 - %r4 = fadd float %r3, %r2 - call void @llvm.AMDGPU.store.output(float %r4, i32 0) + %r4 = fadd float %r3, %r2 + %vec = insertelement <4 x float> undef, float %r4, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @fabs(float ) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll index be25c9ce8d8..d7127f485c7 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/R600/fmax.ll @@ -2,15 +2,16 @@ ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = fcmp oge float %r0, %r1 %r3 = select i1 %r2, float %r0, float %r1 - call void @llvm.AMDGPU.store.output(float %r3, i32 0) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll index 5e34b7c8902..defa8c09638 100644 --- a/test/CodeGen/R600/fmin.ll +++ b/test/CodeGen/R600/fmin.ll @@ -2,15 +2,16 @@ ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = fcmp uge float %r0, %r1 %r3 = select i1 %r2, float %r1, float %r0 - call void @llvm.AMDGPU.store.output(float %r3, i32 0) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll index cc0732b3fff..83b56a5029d 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -2,16 +2,16 @@ ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) - call void @llvm.AMDGPU.store.output(float %r2, i32 0) + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.AMDGPU.mul(float ,float ) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index 8fb4559f89d..aaf2305dd0b 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -5,15 +5,15 @@ ;CHECK: ADD * ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @llvm.cos.f32(float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } declare float @llvm.cos.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index 0f51cf46f59..b587d2b2aea 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -4,16 +4,16 @@ ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = call float @llvm.pow.f32( float %r0, float %r1) - call void @llvm.AMDGPU.store.output(float %r2, i32 0) + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.pow.f32(float ,float ) readonly +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index e94c2ba56b8..9eb998315fe 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -5,15 +5,15 @@ ;CHECK: ADD * ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @llvm.sin.f32( float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } declare float @llvm.sin.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll index aff2a6e18f7..ca86d0e3690 100644 --- a/test/CodeGen/R600/load-input-fold.ll +++ b/test/CodeGen/R600/load-input-fold.ll @@ -1,20 +1,20 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman ;REQUIRES: asserts -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) - %5 = call float @llvm.R600.load.input(i32 9) - %6 = call float @llvm.R600.load.input(i32 10) - %7 = call float @llvm.R600.load.input(i32 11) - %8 = call float @llvm.R600.load.input(i32 12) - %9 = call float @llvm.R600.load.input(i32 13) - %10 = call float @llvm.R600.load.input(i32 14) - %11 = call float @llvm.R600.load.input(i32 15) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 + %5 = extractelement <4 x float> %reg2, i32 1 + %6 = extractelement <4 x float> %reg2, i32 2 + %7 = extractelement <4 x float> %reg2, i32 3 + %8 = extractelement <4 x float> %reg3, i32 0 + %9 = extractelement <4 x float> %reg3, i32 1 + %10 = extractelement <4 x float> %reg3, i32 2 + %11 = extractelement <4 x float> %reg3, i32 3 %12 = load <4 x float> addrspace(8)* null %13 = extractelement <4 x float> %12, i32 0 %14 = fmul float %0, %13 @@ -95,9 +95,6 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll index c31b7c06bbe..65a6d2b5fc9 100644 --- a/test/CodeGen/R600/max-literals.ll +++ b/test/CodeGen/R600/max-literals.ll @@ -3,13 +3,13 @@ ; CHECK: @main ; CHECK: ADD * -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 %5 = fadd float %0, 2.0 %6 = fadd float %1, 3.0 %7 = fadd float %2, 4.0 @@ -32,13 +32,13 @@ main_body: ; CHECK: @main ; CHECK-NOT: ADD * -define void @main2() #0 { +define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 %5 = fadd float %0, 2.0 %6 = fadd float %1, 3.0 %7 = fadd float %2, 4.0 @@ -59,7 +59,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll index 03fc204559b..e5615b99728 100644 --- a/test/CodeGen/R600/pv-packing.ll +++ b/test/CodeGen/R600/pv-packing.ll @@ -3,17 +3,17 @@ ;CHECK: DOT4 T{{[0-9]\.X}} ;CHECK: MULADD_IEEE * T{{[0-9]\.W}} -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 8) - %4 = call float @llvm.R600.load.input(i32 9) - %5 = call float @llvm.R600.load.input(i32 10) - %6 = call float @llvm.R600.load.input(i32 12) - %7 = call float @llvm.R600.load.input(i32 13) - %8 = call float @llvm.R600.load.input(i32 14) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg2, i32 0 + %4 = extractelement <4 x float> %reg2, i32 1 + %5 = extractelement <4 x float> %reg2, i32 2 + %6 = extractelement <4 x float> %reg3, i32 0 + %7 = extractelement <4 x float> %reg3, i32 1 + %8 = extractelement <4 x float> %reg3, i32 2 %9 = load <4 x float> addrspace(8)* null %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9) @@ -35,9 +35,6 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 @@ -46,5 +43,3 @@ declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } -attributes #2 = { readonly } -attributes #3 = { nounwind readonly } diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index 6d9396cb7d1..5a930b29268 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -3,36 +3,36 @@ ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) ;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) - %5 = call float @llvm.R600.load.input(i32 9) - %6 = call float @llvm.R600.load.input(i32 10) - %7 = call float @llvm.R600.load.input(i32 11) - %8 = call float @llvm.R600.load.input(i32 12) - %9 = call float @llvm.R600.load.input(i32 13) - %10 = call float @llvm.R600.load.input(i32 14) - %11 = call float @llvm.R600.load.input(i32 15) - %12 = call float @llvm.R600.load.input(i32 16) - %13 = call float @llvm.R600.load.input(i32 17) - %14 = call float @llvm.R600.load.input(i32 18) - %15 = call float @llvm.R600.load.input(i32 19) - %16 = call float @llvm.R600.load.input(i32 20) - %17 = call float @llvm.R600.load.input(i32 21) - %18 = call float @llvm.R600.load.input(i32 22) - %19 = call float @llvm.R600.load.input(i32 23) - %20 = call float @llvm.R600.load.input(i32 24) - %21 = call float @llvm.R600.load.input(i32 25) - %22 = call float @llvm.R600.load.input(i32 26) - %23 = call float @llvm.R600.load.input(i32 27) - %24 = call float @llvm.R600.load.input(i32 28) - %25 = call float @llvm.R600.load.input(i32 29) - %26 = call float @llvm.R600.load.input(i32 30) - %27 = call float @llvm.R600.load.input(i32 31) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 + %5 = extractelement <4 x float> %reg2, i32 1 + %6 = extractelement <4 x float> %reg2, i32 2 + %7 = extractelement <4 x float> %reg2, i32 3 + %8 = extractelement <4 x float> %reg3, i32 0 + %9 = extractelement <4 x float> %reg3, i32 1 + %10 = extractelement <4 x float> %reg3, i32 2 + %11 = extractelement <4 x float> %reg3, i32 3 + %12 = extractelement <4 x float> %reg4, i32 0 + %13 = extractelement <4 x float> %reg4, i32 1 + %14 = extractelement <4 x float> %reg4, i32 2 + %15 = extractelement <4 x float> %reg4, i32 3 + %16 = extractelement <4 x float> %reg5, i32 0 + %17 = extractelement <4 x float> %reg5, i32 1 + %18 = extractelement <4 x float> %reg5, i32 2 + %19 = extractelement <4 x float> %reg5, i32 3 + %20 = extractelement <4 x float> %reg6, i32 0 + %21 = extractelement <4 x float> %reg6, i32 1 + %22 = extractelement <4 x float> %reg6, i32 2 + %23 = extractelement <4 x float> %reg6, i32 3 + %24 = extractelement <4 x float> %reg7, i32 0 + %25 = extractelement <4 x float> %reg7, i32 1 + %26 = extractelement <4 x float> %reg7, i32 2 + %27 = extractelement <4 x float> %reg7, i32 3 %28 = load <4 x float> addrspace(8)* null %29 = extractelement <4 x float> %28, i32 0 %30 = fmul float %0, %29 @@ -218,9 +218,6 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll index 6ef3c3124b8..b760c882f4e 100644 --- a/test/CodeGen/R600/r600-encoding.ll +++ b/test/CodeGen/R600/r600-encoding.ll @@ -10,15 +10,16 @@ ; R600-CHECK: @test ; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] -define void @test() { +define void @test(<4 x float> inreg %reg0) #0 { entry: - %0 = call float @llvm.R600.load.input(i32 0) - %1 = call float @llvm.R600.load.input(i32 1) - %2 = fmul float %0, %1 - call void @llvm.AMDGPU.store.output(float %2, i32 0) + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = fmul float %r0, %r1 + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll index 78c703b74e2..73bc0635ab2 100644 --- a/test/CodeGen/R600/r600-export-fix.ll +++ b/test/CodeGen/R600/r600-export-fix.ll @@ -10,12 +10,12 @@ ;CHECK: EXPORT T{{[0-9]}}.0000 -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %5 = extractelement <4 x float> %4, i32 0 %6 = fmul float %5, %0 @@ -137,10 +137,6 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll index 895ad5e1c8d..6dee3ef89cf 100644 --- a/test/CodeGen/R600/r600cfg.ll +++ b/test/CodeGen/R600/r600cfg.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood ;REQUIRES: asserts -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = bitcast float %0 to i32 %5 = icmp eq i32 %4, 0 %6 = sext i1 %5 to i32 @@ -113,12 +113,8 @@ ENDIF48: ; preds = %LOOP47 br label %LOOP47 } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll index 27839296703..b4ac47afced 100644 --- a/test/CodeGen/R600/reciprocal.ll +++ b/test/CodeGen/R600/reciprocal.ll @@ -2,15 +2,14 @@ ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = fdiv float 1.0, %r0 - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) - -declare float @llvm.AMDGPU.rcp(float ) readnone +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll index 474d6ba9026..c3fd923e459 100644 --- a/test/CodeGen/R600/rv7x0_count3.ll +++ b/test/CodeGen/R600/rv7x0_count3.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s -; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80] +; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80] -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %1 = call float @llvm.R600.load.input(i32 4) - %2 = call float @llvm.R600.load.input(i32 5) - %3 = call float @llvm.R600.load.input(i32 6) - %4 = call float @llvm.R600.load.input(i32 7) +define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { + %1 = extractelement <4 x float> %reg1, i32 0 + %2 = extractelement <4 x float> %reg1, i32 1 + %3 = extractelement <4 x float> %reg1, i32 2 + %4 = extractelement <4 x float> %reg1, i32 3 %5 = insertelement <4 x float> undef, float %1, i32 0 %6 = insertelement <4 x float> %5, float %2, i32 1 %7 = insertelement <4 x float> %6, float %3, i32 2 @@ -36,9 +36,6 @@ define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #1 = { readnone } + +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll index 2a660943098..11e8f5176f4 100644 --- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll +++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs ;REQUIRES: asserts -define void @main() { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 { main_body: - %0 = call float @llvm.R600.interp.input(i32 0, i32 0) - %1 = call float @llvm.R600.interp.input(i32 1, i32 0) - %2 = call float @llvm.R600.interp.input(i32 2, i32 0) - %3 = call float @llvm.R600.interp.input(i32 3, i32 0) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = fcmp ult float %1, 0.000000e+00 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 %6 = fsub float -0.000000e+00, %5 @@ -74,10 +74,9 @@ ELSE17: ; preds = %ELSE br label %ENDIF } -declare float @llvm.R600.interp.input(i32, i32) #0 - declare float @llvm.AMDIL.clamp.(float, float, float) #0 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { readnone } +attributes #1 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll index 44b7c2f6800..33b20d36737 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched ;REQUIRES: asserts -define void @main() { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = fcmp ult float %0, 0.000000e+00 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 %6 = fsub float -0.000000e+00, %5 @@ -127,8 +127,6 @@ ENDIF19: ; preds = %ENDIF16 br label %LOOP } -declare float @llvm.R600.load.input(i32) #0 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #0 = { readnone } +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll index c49b5f4bd15..0484fc9a856 100644 --- a/test/CodeGen/R600/shared-op-cycle.ll +++ b/test/CodeGen/R600/shared-op-cycle.ll @@ -4,10 +4,10 @@ ; CHECK: MULADD_IEEE * ; CHECK-NOT: MULADD_IEEE * -define void @main() { - %w0 = call float @llvm.R600.load.input(i32 3) - %w1 = call float @llvm.R600.load.input(i32 7) - %w2 = call float @llvm.R600.load.input(i32 11) +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { + %w0 = extractelement <4 x float> %reg0, i32 3 + %w1 = extractelement <4 x float> %reg1, i32 3 + %w2 = extractelement <4 x float> %reg2, i32 3 %sq0 = fmul float %w0, %w0 %r0 = fadd float %sq0, 2.0 %sq1 = fmul float %w1, %w1 @@ -23,16 +23,10 @@ define void @main() { ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } -attributes #2 = { readonly } -attributes #3 = { nounwind readonly } \ No newline at end of file +attributes #1 = { readnone } \ No newline at end of file diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll index 9a58f667f0d..16c3f191935 100644 --- a/test/CodeGen/R600/swizzle-export.ll +++ b/test/CodeGen/R600/swizzle-export.ll @@ -6,12 +6,12 @@ ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX ;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = load <4 x float> addrspace(8)* null %5 = extractelement <4 x float> %4, i32 1 %6 = load <4 x float> addrspace(8)* null @@ -96,12 +96,12 @@ main_body: ; EG-CHECK: T{{[0-9]+}}.XY__ ; EG-CHECK: T{{[0-9]+}}.YXZ0 -define void @main2() #0 { +define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = fadd float %0, 2.5 + %3 = fmul float %1, 3.5 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %5 = extractelement <4 x float> %4, i32 0 %6 = call float @llvm.cos.f32(float %5) @@ -109,8 +109,8 @@ main_body: %8 = extractelement <4 x float> %7, i32 0 %9 = load <4 x float> addrspace(8)* null %10 = extractelement <4 x float> %9, i32 1 - %11 = insertelement <4 x float> undef, float %0, i32 0 - %12 = insertelement <4 x float> %11, float %1, i32 1 + %11 = insertelement <4 x float> undef, float %2, i32 0 + %12 = insertelement <4 x float> %11, float %3, i32 1 call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1) %13 = insertelement <4 x float> undef, float %6, i32 0 %14 = insertelement <4 x float> %13, float %8, i32 1 @@ -120,14 +120,10 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: nounwind readonly -declare float @llvm.cos.f32(float) #2 +declare float @llvm.cos.f32(float) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } -attributes #2 = { nounwind readonly } +attributes #1 = { nounwind readonly } diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll index 5979609ce45..cbb9c50974a 100644 --- a/test/CodeGen/R600/tex-clause-antidep.ll +++ b/test/CodeGen/R600/tex-clause-antidep.ll @@ -3,11 +3,11 @@ ;CHECK: TEX ;CHECK-NEXT: ALU -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) +define void @test(<4 x float> inreg %reg0) #0 { + %1 = extractelement <4 x float> %reg0, i32 0 + %2 = extractelement <4 x float> %reg0, i32 1 + %3 = extractelement <4 x float> %reg0, i32 2 + %4 = extractelement <4 x float> %reg0, i32 3 %5 = insertelement <4 x float> undef, float %1, i32 0 %6 = insertelement <4 x float> %5, float %2, i32 1 %7 = insertelement <4 x float> %6, float %3, i32 2 @@ -19,6 +19,7 @@ define void @test() { ret void } -declare float @llvm.R600.load.input(i32) readnone declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } \ No newline at end of file diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll index 5d0ecef3069..789538af582 100644 --- a/test/CodeGen/R600/texture-input-merge.ll +++ b/test/CodeGen/R600/texture-input-merge.ll @@ -2,11 +2,11 @@ ;CHECK-NOT: MOV -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) +define void @test(<4 x float> inreg %reg0) #0 { + %1 = extractelement <4 x float> %reg0, i32 0 + %2 = extractelement <4 x float> %reg0, i32 1 + %3 = extractelement <4 x float> %reg0, i32 2 + %4 = extractelement <4 x float> %reg0, i32 3 %5 = fmul float %1, 3.0 %6 = fmul float %2, 3.0 %7 = fmul float %3, 3.0 @@ -25,6 +25,7 @@ define void @test() { ret void } -declare float @llvm.R600.load.input(i32) readnone declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } \ No newline at end of file