mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-14 06:37:33 +00:00
R600/SI: Prefer selecting more e64 instruction forms.
Add some more tests to make sure better operand choices are still made. Leave some cases that seem to have no reason to ever be e64 alone. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217789 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6fc71a0cfc
commit
f1b16047b7
lib/Target/R600
test/CodeGen/R600
@ -1976,9 +1976,9 @@ class BinOp64Pat <SDNode node, Instruction inst> : Pat <
|
||||
(EXTRACT_SUBREG i64:$src1, sub1)), sub1)
|
||||
>;
|
||||
|
||||
def : BinOp64Pat <and, V_AND_B32_e32>;
|
||||
def : BinOp64Pat <or, V_OR_B32_e32>;
|
||||
def : BinOp64Pat <xor, V_XOR_B32_e32>;
|
||||
def : BinOp64Pat <and, V_AND_B32_e64>;
|
||||
def : BinOp64Pat <or, V_OR_B32_e64>;
|
||||
def : BinOp64Pat <xor, V_XOR_B32_e64>;
|
||||
|
||||
class SextInReg <ValueType vt, int ShiftAmt> : Pat <
|
||||
(sext_inreg i32:$src0, vt),
|
||||
@ -1990,7 +1990,7 @@ def : SextInReg <i16, 16>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
|
||||
(V_BCNT_U32_B32_e32 $popcnt, $val)
|
||||
(V_BCNT_U32_B32_e64 $popcnt, $val)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
@ -2010,7 +2010,7 @@ def : Pat <
|
||||
|
||||
def : Pat <
|
||||
(addc i32:$src0, i32:$src1),
|
||||
(V_ADD_I32_e32 $src0, $src1)
|
||||
(V_ADD_I32_e64 $src0, $src1)
|
||||
>;
|
||||
|
||||
/********** ======================= **********/
|
||||
@ -3070,13 +3070,13 @@ def : Pat <
|
||||
|
||||
def : Pat <
|
||||
(i1 (trunc i32:$a)),
|
||||
(V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
|
||||
(V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
|
||||
>;
|
||||
|
||||
//============================================================================//
|
||||
// Miscellaneous Optimization Patterns
|
||||
//============================================================================//
|
||||
|
||||
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
|
||||
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
|
||||
|
||||
} // End isSI predicate
|
||||
|
@ -129,11 +129,30 @@ endif:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_and_constant_i64
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%a = load i64 addrspace(1)* %aptr, align 8
|
||||
%and = and i64 %a, 1234567
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Replace and 0 with mov 0
|
||||
; FUNC-LABEL: @v_and_inline_imm_i64
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
|
||||
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%a = load i64 addrspace(1)* %aptr, align 8
|
||||
%and = and i64 %a, 64
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_and_inline_imm_i64
|
||||
; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
|
||||
define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 64
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ entry:
|
||||
; R600-CHECK: @bfi_sha256_ma
|
||||
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
|
||||
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
|
||||
; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
|
||||
; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
|
||||
; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
|
||||
; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
|
@ -42,8 +42,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
|
||||
; SI-NOT: ADD
|
||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
|
||||
@ -59,6 +58,20 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_ctpop_add_sgpr_i32
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
|
||||
; SI-NEXT: S_WAITCNT
|
||||
; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
|
||||
%val0 = load i32 addrspace(1)* %in0, align 4
|
||||
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
|
||||
%add = add i32 %ctpop0, %sval
|
||||
store i32 %add, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_ctpop_v2i32:
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
|
@ -4,10 +4,29 @@
|
||||
declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @rsq_f32
|
||||
; SI: V_RSQ_F32_e32
|
||||
; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
; EG: RECIPSQRT_IEEE
|
||||
define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
|
||||
%rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: Really these should be constant folded
|
||||
; FUNC-LABEL: @rsq_f32_constant_4.0
|
||||
; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 4.0
|
||||
; EG: RECIPSQRT_IEEE
|
||||
define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
|
||||
%rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @rsq_f32_constant_100.0
|
||||
; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 0x42c80000
|
||||
; EG: RECIPSQRT_IEEE
|
||||
define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
|
||||
%rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -26,3 +26,13 @@ define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noa
|
||||
store double %div, double addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @rsq_f32_sgpr
|
||||
; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: S_ENDPGM
|
||||
define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
|
||||
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
|
||||
%div = fdiv float 1.0, %sqrt
|
||||
store float %div, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -46,9 +46,20 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
|
||||
}
|
||||
|
||||
; SI-LABEL: @trunc_i32_to_i1:
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI: V_CMP_EQ_I32
|
||||
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
|
||||
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
|
||||
%a = load i32 addrspace(1)* %ptr, align 4
|
||||
%trunc = trunc i32 %a to i1
|
||||
%result = select i1 %trunc, i32 1, i32 0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @sgpr_trunc_i32_to_i1:
|
||||
; SI: V_AND_B32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
|
||||
; SI: V_CMP_EQ_I32
|
||||
define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
|
||||
%trunc = trunc i32 %a to i1
|
||||
%result = select i1 %trunc, i32 1, i32 0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
|
Loading…
x
Reference in New Issue
Block a user