R600/SI: add mulhu/mulhs patterns

Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> Tested-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178126 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-25 05:22:04 +00:00 · 2013-03-27 09:12:51 +00:00
parent a62b1a149a
commit 45b14e341a
4 changed files with 36 additions and 2 deletions
@@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);

+  setOperationAction(ISD::MUL, MVT::i64, Expand);
+
  setOperationAction(ISD::UDIV, MVT::i32, Expand);
  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
  setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -28,7 +28,6 @@ using namespace llvm;
 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
    AMDGPUTargetLowering(TM),
    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
-  setOperationAction(ISD::MUL, MVT::i64, Expand);
  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -975,14 +975,31 @@ def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
 def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
 def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
 def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+
+let isCommutable = 1 in {
+
 def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
 def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
 def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+} // isCommutable = 1
+
 def : Pat <
  (mul VSrc_32:$src0, VReg_32:$src1),
  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
 >;
-def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+def : Pat <
+  (mulhu VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+  (mulhs VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
 def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
 def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
 def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
+;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
+;CHECK-NEXT: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 3
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)