From 45b14e341a8a85e877d001bbd43f5e2b25b61cb8 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Wed, 27 Mar 2013 09:12:51 +0000 Subject: [PATCH] R600/SI: add mulhu/mulhs patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Michel Dänzer Tested-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178126 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 2 ++ lib/Target/R600/R600ISelLowering.cpp | 1 - lib/Target/R600/SIInstructions.td | 19 ++++++++++++++++++- test/CodeGen/R600/mulhu.ll | 16 ++++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/R600/mulhu.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 5995b6f5e80..a266df535d5 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 7bdba834a7f..53e6e51dd2b 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -28,7 +28,6 @@ using namespace llvm; R600TargetLowering::R600TargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM), TII(static_cast(TM.getInstrInfo())) { - setOperationAction(ISD::MUL, MVT::i64, Expand); addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b6385509b4b..b4805212008 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -975,14 +975,31 @@ def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; + +let isCommutable = 1 in { + def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +} // isCommutable = 1 + def : Pat < (mul VSrc_32:$src0, VReg_32:$src1), (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) >; -def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +def : Pat < + (mulhu VSrc_32:$src0, VReg_32:$src1), + (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; + +def : Pat < + (mulhs VSrc_32:$src0, VReg_32:$src1), + (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; + def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll new file mode 100644 index 00000000000..979074df2bb --- /dev/null +++ b/test/CodeGen/R600/mulhu.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 +;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 +;CHECK-NEXT: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 + +define void @test(i32 %p) { + %i = udiv i32 %p, 3 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)