mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 04:33:05 +00:00
R600: Fix handling of NAN in comparison instructions
We were completely ignoring the unorder/ordered attributes of condition codes and also incorrectly lowering seto and setuo. Reviewed-by: Vincent Lejeune<vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191603 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bbafe422d6
commit
9c598cfebc
@ -43,12 +43,23 @@ def COND_EQ : PatLeaf <
|
||||
case ISD::SETEQ: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_OEQ : PatLeaf <
|
||||
(cond),
|
||||
[{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
|
||||
>;
|
||||
|
||||
def COND_NE : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETONE: case ISD::SETUNE:
|
||||
case ISD::SETNE: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_UNE : PatLeaf <
|
||||
(cond),
|
||||
[{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
|
||||
>;
|
||||
|
||||
def COND_GT : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
@ -56,6 +67,11 @@ def COND_GT : PatLeaf <
|
||||
case ISD::SETGT: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_OGT : PatLeaf <
|
||||
(cond),
|
||||
[{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
|
||||
>;
|
||||
|
||||
def COND_GE : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
@ -63,6 +79,11 @@ def COND_GE : PatLeaf <
|
||||
case ISD::SETGE: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_OGE : PatLeaf <
|
||||
(cond),
|
||||
[{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
|
||||
>;
|
||||
|
||||
def COND_LT : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
|
@ -38,10 +38,17 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
|
||||
// Set condition code actions
|
||||
setCondCodeAction(ISD::SETO, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
|
||||
setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
|
||||
|
||||
|
@ -689,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
|
||||
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
|
||||
def SETE : R600_2OP <
|
||||
0x08, "SETE",
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
|
||||
>;
|
||||
|
||||
def SGT : R600_2OP <
|
||||
0x09, "SETGT",
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
|
||||
>;
|
||||
|
||||
def SGE : R600_2OP <
|
||||
0xA, "SETGE",
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
|
||||
>;
|
||||
|
||||
def SNE : R600_2OP <
|
||||
0xB, "SETNE",
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
|
||||
>;
|
||||
|
||||
def SETE_DX10 : R600_2OP <
|
||||
0xC, "SETE_DX10",
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
|
||||
>;
|
||||
|
||||
def SETGT_DX10 : R600_2OP <
|
||||
0xD, "SETGT_DX10",
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
|
||||
>;
|
||||
|
||||
def SETGE_DX10 : R600_2OP <
|
||||
0xE, "SETGE_DX10",
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
|
||||
>;
|
||||
|
||||
def SETNE_DX10 : R600_2OP <
|
||||
0xF, "SETNE_DX10",
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
|
||||
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
|
||||
>;
|
||||
|
||||
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
|
||||
@ -920,19 +920,19 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
|
||||
|
||||
class CNDE_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "CNDE",
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
|
||||
>;
|
||||
|
||||
class CNDGT_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "CNDGT",
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
|
||||
> {
|
||||
let Itinerary = VecALU;
|
||||
}
|
||||
|
||||
class CNDGE_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "CNDGE",
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
|
||||
> {
|
||||
let Itinerary = VecALU;
|
||||
}
|
||||
@ -2324,38 +2324,6 @@ def KIL : Pat <
|
||||
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
|
||||
>;
|
||||
|
||||
// The next two patterns are special cases for handling 'true if ordered' and
|
||||
// 'true if unordered' conditionals. The assumption here is that the behavior of
|
||||
// SETE and SNE conforms to the Direct3D 10 rules for floating point values
|
||||
// described here:
|
||||
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
|
||||
// We assume that SETE returns false when one of the operands is NAN and
|
||||
// SNE returns true when on of the operands is NAN
|
||||
|
||||
//SETE - 'true if ordered'
|
||||
def : Pat <
|
||||
(selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
|
||||
(SETE $src0, $src1)
|
||||
>;
|
||||
|
||||
//SETE_DX10 - 'true if ordered'
|
||||
def : Pat <
|
||||
(selectcc f32:$src0, f32:$src1, -1, 0, SETO),
|
||||
(SETE_DX10 $src0, $src1)
|
||||
>;
|
||||
|
||||
//SNE - 'true if unordered'
|
||||
def : Pat <
|
||||
(selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
|
||||
(SNE $src0, $src1)
|
||||
>;
|
||||
|
||||
//SETNE_DX10 - 'true if ordered'
|
||||
def : Pat <
|
||||
(selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
|
||||
(SETNE_DX10 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : Extract_Element <f32, v4f32, 0, sub0>;
|
||||
def : Extract_Element <f32, v4f32, 1, sub1>;
|
||||
def : Extract_Element <f32, v4f32, 2, sub2>;
|
||||
|
@ -5,7 +5,7 @@
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
%r2 = fcmp uge float %r0, %r1
|
||||
%r2 = fcmp oge float %r0, %r1
|
||||
%r3 = select i1 %r2, float %r0, float %r1
|
||||
call void @llvm.AMDGPU.store.output(float %r3, i32 0)
|
||||
ret void
|
||||
|
@ -10,7 +10,7 @@ main_body:
|
||||
%3 = extractelement <4 x float> %2, i32 0
|
||||
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
%6 = fcmp ult float %1, 0.000000e+00
|
||||
%6 = fcmp ogt float %1, 0.000000e+00
|
||||
%7 = select i1 %6, float %3, float %5
|
||||
%8 = load <4 x float> addrspace(8)* null
|
||||
%9 = extractelement <4 x float> %8, i32 1
|
||||
@ -18,7 +18,7 @@ main_body:
|
||||
%11 = extractelement <4 x float> %10, i32 1
|
||||
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%13 = extractelement <4 x float> %12, i32 1
|
||||
%14 = fcmp ult float %9, 0.000000e+00
|
||||
%14 = fcmp ogt float %9, 0.000000e+00
|
||||
%15 = select i1 %14, float %11, float %13
|
||||
%16 = load <4 x float> addrspace(8)* null
|
||||
%17 = extractelement <4 x float> %16, i32 2
|
||||
@ -26,7 +26,7 @@ main_body:
|
||||
%19 = extractelement <4 x float> %18, i32 2
|
||||
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%21 = extractelement <4 x float> %20, i32 2
|
||||
%22 = fcmp ult float %17, 0.000000e+00
|
||||
%22 = fcmp ogt float %17, 0.000000e+00
|
||||
%23 = select i1 %22, float %19, float %21
|
||||
%24 = load <4 x float> addrspace(8)* null
|
||||
%25 = extractelement <4 x float> %24, i32 3
|
||||
@ -34,7 +34,7 @@ main_body:
|
||||
%27 = extractelement <4 x float> %26, i32 3
|
||||
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%29 = extractelement <4 x float> %28, i32 3
|
||||
%30 = fcmp ult float %25, 0.000000e+00
|
||||
%30 = fcmp ogt float %25, 0.000000e+00
|
||||
%31 = select i1 %30, float %27, float %29
|
||||
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
|
||||
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
|
||||
@ -58,7 +58,7 @@ main_body:
|
||||
%3 = extractelement <4 x float> %2, i32 0
|
||||
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
%6 = fcmp ult float %1, 0.000000e+00
|
||||
%6 = fcmp ogt float %1, 0.000000e+00
|
||||
%7 = select i1 %6, float %3, float %5
|
||||
%8 = load <4 x float> addrspace(8)* null
|
||||
%9 = extractelement <4 x float> %8, i32 1
|
||||
@ -66,7 +66,7 @@ main_body:
|
||||
%11 = extractelement <4 x float> %10, i32 0
|
||||
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%13 = extractelement <4 x float> %12, i32 1
|
||||
%14 = fcmp ult float %9, 0.000000e+00
|
||||
%14 = fcmp ogt float %9, 0.000000e+00
|
||||
%15 = select i1 %14, float %11, float %13
|
||||
%16 = load <4 x float> addrspace(8)* null
|
||||
%17 = extractelement <4 x float> %16, i32 2
|
||||
@ -74,7 +74,7 @@ main_body:
|
||||
%19 = extractelement <4 x float> %18, i32 3
|
||||
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%21 = extractelement <4 x float> %20, i32 2
|
||||
%22 = fcmp ult float %17, 0.000000e+00
|
||||
%22 = fcmp ogt float %17, 0.000000e+00
|
||||
%23 = select i1 %22, float %19, float %21
|
||||
%24 = load <4 x float> addrspace(8)* null
|
||||
%25 = extractelement <4 x float> %24, i32 3
|
||||
@ -82,7 +82,7 @@ main_body:
|
||||
%27 = extractelement <4 x float> %26, i32 3
|
||||
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%29 = extractelement <4 x float> %28, i32 2
|
||||
%30 = fcmp ult float %25, 0.000000e+00
|
||||
%30 = fcmp ogt float %25, 0.000000e+00
|
||||
%31 = select i1 %30, float %27, float %29
|
||||
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
|
||||
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 | FileCheck %s
|
||||
|
||||
;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
|
||||
;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X
|
||||
;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
|
||||
|
||||
define void @main() #0 {
|
||||
main_body:
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
define void @test_a(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ult float %in, 0.000000e+00
|
||||
%0 = fcmp olt float %in, 0.000000e+00
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
@ -34,7 +34,7 @@ ENDIF:
|
||||
; CHECK-NEXT: ALU clause starting
|
||||
define void @test_b(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ult float %in, 0.0
|
||||
%0 = fcmp olt float %in, 0.0
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
|
@ -30,13 +30,13 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ueq_select_fptosi
|
||||
; CHECK: @fcmp_oeq_select_fptosi
|
||||
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ueq float %in, 5.0
|
||||
%0 = fcmp oeq float %in, 5.0
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
@ -44,25 +44,25 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ueq_select_i32
|
||||
; CHECK: @fcmp_oeq_select_i32
|
||||
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ueq float %in, 5.0
|
||||
%0 = fcmp oeq float %in, 5.0
|
||||
%1 = select i1 %0, i32 -1, i32 0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ugt_select_fptosi
|
||||
; CHECK: @fcmp_ogt_select_fptosi
|
||||
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ugt float %in, 5.0
|
||||
%0 = fcmp ogt float %in, 5.0
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
@ -70,25 +70,25 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ugt_select_i32
|
||||
; CHECK: @fcmp_ogt_select_i32
|
||||
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ugt float %in, 5.0
|
||||
%0 = fcmp ogt float %in, 5.0
|
||||
%1 = select i1 %0, i32 -1, i32 0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_uge_select_fptosi
|
||||
; CHECK: @fcmp_oge_select_fptosi
|
||||
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp uge float %in, 5.0
|
||||
%0 = fcmp oge float %in, 5.0
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
@ -96,25 +96,25 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_uge_select_i32
|
||||
; CHECK: @fcmp_oge_select_i32
|
||||
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp uge float %in, 5.0
|
||||
%0 = fcmp oge float %in, 5.0
|
||||
%1 = select i1 %0, i32 -1, i32 0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ule_select_fptosi
|
||||
; CHECK: @fcmp_ole_select_fptosi
|
||||
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ule float %in, 5.0
|
||||
%0 = fcmp ole float %in, 5.0
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
@ -122,25 +122,25 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ule_select_i32
|
||||
; CHECK: @fcmp_ole_select_i32
|
||||
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ule float %in, 5.0
|
||||
%0 = fcmp ole float %in, 5.0
|
||||
%1 = select i1 %0, i32 -1, i32 0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ult_select_fptosi
|
||||
; CHECK: @fcmp_olt_select_fptosi
|
||||
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ult float %in, 5.0
|
||||
%0 = fcmp olt float %in, 5.0
|
||||
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
%3 = fptosi float %2 to i32
|
||||
@ -148,13 +148,13 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fcmp_ult_select_i32
|
||||
; CHECK: @fcmp_olt_select_i32
|
||||
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ult float %in, 5.0
|
||||
%0 = fcmp olt float %in, 5.0
|
||||
%1 = select i1 %0, i32 -1, i32 0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
; These tests are for condition codes that are not supported by the hardware
|
||||
|
||||
; CHECK: @slt
|
||||
; CHECK-LABEL: @slt
|
||||
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 5(7.006492e-45)
|
||||
@ -14,7 +14,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @ult_i32
|
||||
; CHECK-LABEL: @ult_i32
|
||||
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 5(7.006492e-45)
|
||||
@ -26,10 +26,11 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @ult_float
|
||||
; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-LABEL: @ult_float
|
||||
; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
|
||||
; CHECK-NEXT: LSHR *
|
||||
define void @ult_float(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ult float %in, 5.0
|
||||
@ -38,10 +39,22 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @olt
|
||||
; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
;CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
; CHECK-LABEL: @ult_float_native
|
||||
; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
|
||||
; CHECK-NEXT: LSHR *
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @ult_float_native(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ult float %in, 5.0
|
||||
%1 = select i1 %0, float 0.0, float 1.0
|
||||
store float %1, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @olt
|
||||
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR *
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @olt(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp olt float %in, 5.0
|
||||
@ -50,7 +63,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @sle
|
||||
; CHECK-LABEL: @sle
|
||||
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 6(8.407791e-45)
|
||||
@ -62,7 +75,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @ule_i32
|
||||
; CHECK-LABEL: @ule_i32
|
||||
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-NEXT: 6(8.407791e-45)
|
||||
@ -74,10 +87,11 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @ule_float
|
||||
; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-LABEL: @ule_float
|
||||
; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
|
||||
; CHECK-NEXT: LSHR *
|
||||
define void @ule_float(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ule float %in, 5.0
|
||||
@ -86,9 +100,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @ole
|
||||
; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR
|
||||
; CHECK-LABEL: @ule_float_native
|
||||
; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
|
||||
; CHECK-NEXT: LSHR *
|
||||
; CHECK-NEXT: 1084227584(5.000000e+00)
|
||||
define void @ule_float_native(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fcmp ule float %in, 5.0
|
||||
%1 = select i1 %0, float 0.0, float 1.0
|
||||
store float %1, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ole
|
||||
; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
|
||||
; CHECK-NEXT: LSHR *
|
||||
; CHECK-NEXT:1084227584(5.000000e+00)
|
||||
define void @ole(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
|
@ -31,7 +31,7 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs
|
||||
entry:
|
||||
%0 = load <2 x float> addrspace(1)* %in0
|
||||
%1 = load <2 x float> addrspace(1)* %in1
|
||||
%cmp = fcmp one <2 x float> %0, %1
|
||||
%cmp = fcmp une <2 x float> %0, %1
|
||||
%result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
|
||||
store <2 x float> %result, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
@ -69,7 +69,7 @@ define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrs
|
||||
entry:
|
||||
%0 = load <4 x float> addrspace(1)* %in0
|
||||
%1 = load <4 x float> addrspace(1)* %in1
|
||||
%cmp = fcmp one <4 x float> %0, %1
|
||||
%cmp = fcmp une <4 x float> %0, %1
|
||||
%result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
|
||||
store <4 x float> %result, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
|
Loading…
Reference in New Issue
Block a user