mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 23:31:37 +00:00
R600: Expand mul24 for GPUs without it
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
21851f9adb
commit
cb0402e9a4
@ -433,16 +433,29 @@ class UMad24Pat<Instruction Inst> : Pat <
|
||||
(Inst $src0, $src1, $src2)
|
||||
>;
|
||||
|
||||
class IMad24ExpandPat<Instruction MulInst, Instruction AddInst> : Pat <
|
||||
multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> {
|
||||
def _expand_imad24 : Pat <
|
||||
(AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2),
|
||||
(AddInst (MulInst $src0, $src1), $src2)
|
||||
>;
|
||||
>;
|
||||
|
||||
class UMad24ExpandPat<Instruction MulInst, Instruction AddInst> : Pat <
|
||||
def _expand_imul24 : Pat <
|
||||
(AMDGPUmul_i24 i32:$src0, i32:$src1),
|
||||
(MulInst $src0, $src1)
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
|
||||
def _expand_umad24 : Pat <
|
||||
(AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2),
|
||||
(AddInst (MulInst $src0, $src1), $src2)
|
||||
>;
|
||||
>;
|
||||
|
||||
def _expand_umul24 : Pat <
|
||||
(AMDGPUmul_u24 i32:$src0, i32:$src1),
|
||||
(MulInst $src0, $src1)
|
||||
>;
|
||||
}
|
||||
|
||||
include "R600Instructions.td"
|
||||
include "R700Instructions.td"
|
||||
|
@ -49,7 +49,7 @@ def COS_cm : COS_Common<0x8E>;
|
||||
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
|
||||
|
||||
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
|
||||
def : UMad24ExpandPat<MULLO_UINT_cm, ADD_INT>;
|
||||
defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>;
|
||||
|
||||
// RECIP_UINT emulation for Cayman
|
||||
// The multiplication scales from [0,1] to the unsigned integer range
|
||||
|
@ -75,8 +75,7 @@ def COS_eg : COS_Common<0x8E>;
|
||||
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
|
||||
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
|
||||
|
||||
def : IMad24ExpandPat<MULLO_INT_eg, ADD_INT>;
|
||||
def : UMad24ExpandPat<MULLO_UINT_eg, ADD_INT>;
|
||||
defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Memory read/write instructions
|
||||
|
@ -1627,8 +1627,8 @@ def : DwordAddrPat <i32, R600_Reg32>;
|
||||
|
||||
let Predicates = [isR600] in {
|
||||
// Intrinsic patterns
|
||||
def : IMad24ExpandPat<MULLO_INT_r600, ADD_INT>;
|
||||
def : UMad24ExpandPat<MULLO_UINT_r600, ADD_INT>;
|
||||
defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>;
|
||||
defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>;
|
||||
} // End isR600
|
||||
|
||||
def getLDSNoRetOp : InstrMapping {
|
||||
|
@ -1,14 +1,15 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @test_imul24
|
||||
; SI: V_MUL_I32_I24
|
||||
; CM: MUL_INT24
|
||||
; R600: MULLO_INT
|
||||
define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
|
||||
store i32 %mul, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1,11 +1,17 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
|
||||
|
||||
; SI-LABEL: @test_umul24
|
||||
; FUNC-LABEL: @test_umul24
|
||||
; SI: V_MUL_U32_U24
|
||||
; R600: MUL_UINT24
|
||||
; R600: MULLO_UINT
|
||||
define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone
|
||||
store i32 %mul, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user