mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-04 21:30:49 +00:00
R600: Use new tablegen syntax for patterns
All but two patterns have been converted to the new syntax. The remaining two patterns will require COPY_TO_REGCLASS instructions, which the VLIW DAG Scheduler cannot handle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180922 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5ed242c240
commit
399880527d
@ -116,21 +116,21 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
|||||||
(outs rc:$dst),
|
(outs rc:$dst),
|
||||||
(ins rc:$src0),
|
(ins rc:$src0),
|
||||||
"CLAMP $dst, $src0",
|
"CLAMP $dst, $src0",
|
||||||
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
|
[(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
||||||
(outs rc:$dst),
|
(outs rc:$dst),
|
||||||
(ins rc:$src0),
|
(ins rc:$src0),
|
||||||
"FABS $dst, $src0",
|
"FABS $dst, $src0",
|
||||||
[(set rc:$dst, (fabs rc:$src0))]
|
[(set f32:$dst, (fabs f32:$src0))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
||||||
(outs rc:$dst),
|
(outs rc:$dst),
|
||||||
(ins rc:$src0),
|
(ins rc:$src0),
|
||||||
"FNEG $dst, $src0",
|
"FNEG $dst, $src0",
|
||||||
[(set rc:$dst, (fneg rc:$src0))]
|
[(set f32:$dst, (fneg f32:$src0))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // usesCustomInserter = 1
|
} // usesCustomInserter = 1
|
||||||
@ -141,8 +141,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
|
|||||||
(outs dstClass:$dst),
|
(outs dstClass:$dst),
|
||||||
(ins addrClass:$addr, i32imm:$chan),
|
(ins addrClass:$addr, i32imm:$chan),
|
||||||
"RegisterLoad $dst, $addr",
|
"RegisterLoad $dst, $addr",
|
||||||
[(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
|
[(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
|
||||||
(i32 timm:$chan)))]
|
|
||||||
> {
|
> {
|
||||||
let isRegisterLoad = 1;
|
let isRegisterLoad = 1;
|
||||||
}
|
}
|
||||||
@ -151,7 +150,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
|
|||||||
(outs),
|
(outs),
|
||||||
(ins dstClass:$val, addrClass:$addr, i32imm:$chan),
|
(ins dstClass:$val, addrClass:$addr, i32imm:$chan),
|
||||||
"RegisterStore $val, $addr",
|
"RegisterStore $val, $addr",
|
||||||
[(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
|
[(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
|
||||||
> {
|
> {
|
||||||
let isRegisterStore = 1;
|
let isRegisterStore = 1;
|
||||||
}
|
}
|
||||||
@ -162,100 +161,96 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
|
|||||||
/* Generic helper patterns for intrinsics */
|
/* Generic helper patterns for intrinsics */
|
||||||
/* -------------------------------------- */
|
/* -------------------------------------- */
|
||||||
|
|
||||||
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
|
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
|
||||||
RegisterClass rc> : Pat <
|
: Pat <
|
||||||
(fpow rc:$src0, rc:$src1),
|
(fpow f32:$src0, f32:$src1),
|
||||||
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
|
(exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* Other helper patterns */
|
/* Other helper patterns */
|
||||||
/* --------------------- */
|
/* --------------------- */
|
||||||
|
|
||||||
/* Extract element pattern */
|
/* Extract element pattern */
|
||||||
class Extract_Element <ValueType sub_type, ValueType vec_type,
|
class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
|
||||||
RegisterClass vec_class, int sub_idx,
|
SubRegIndex sub_reg>
|
||||||
SubRegIndex sub_reg>: Pat<
|
: Pat<
|
||||||
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
|
(sub_type (vector_extract vec_type:$src, sub_idx)),
|
||||||
(EXTRACT_SUBREG vec_class:$src, sub_reg)
|
(EXTRACT_SUBREG $src, sub_reg)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* Insert element pattern */
|
/* Insert element pattern */
|
||||||
class Insert_Element <ValueType elem_type, ValueType vec_type,
|
class Insert_Element <ValueType elem_type, ValueType vec_type,
|
||||||
RegisterClass elem_class, RegisterClass vec_class,
|
int sub_idx, SubRegIndex sub_reg>
|
||||||
int sub_idx, SubRegIndex sub_reg> : Pat <
|
: Pat <
|
||||||
|
(vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
|
||||||
(vec_type (vector_insert (vec_type vec_class:$vec),
|
(INSERT_SUBREG $vec, $elem, sub_reg)
|
||||||
(elem_type elem_class:$elem), sub_idx)),
|
|
||||||
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// Vector Build pattern
|
// Vector Build pattern
|
||||||
class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
|
class Vector1_Build <ValueType vecType, ValueType elemType,
|
||||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
RegisterClass rc> : Pat <
|
||||||
(vecType (build_vector (elemType elemClass:$src))),
|
(vecType (build_vector elemType:$src)),
|
||||||
(vecType elemClass:$src)
|
(vecType (COPY_TO_REGCLASS $src, rc))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
|
class Vector2_Build <ValueType vecType, ValueType elemType> : Pat <
|
||||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
(vecType (build_vector elemType:$sub0, elemType:$sub1)),
|
||||||
(vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
|
|
||||||
(INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG
|
||||||
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
|
(vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
|
class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
|
||||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
(vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
|
||||||
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
|
|
||||||
(elemType elemClass:$z), (elemType elemClass:$w))),
|
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
|
(vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
|
||||||
elemClass:$z, sub2), elemClass:$w, sub3)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
|
class Vector8_Build <ValueType vecType, ValueType elemType> : Pat <
|
||||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
(vecType (build_vector elemType:$sub0, elemType:$sub1,
|
||||||
(vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
|
elemType:$sub2, elemType:$sub3,
|
||||||
(elemType elemClass:$sub2), (elemType elemClass:$sub3),
|
elemType:$sub4, elemType:$sub5,
|
||||||
(elemType elemClass:$sub4), (elemType elemClass:$sub5),
|
elemType:$sub6, elemType:$sub7)),
|
||||||
(elemType elemClass:$sub6), (elemType elemClass:$sub7))),
|
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
|
(vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
|
||||||
elemClass:$sub2, sub2), elemClass:$sub3, sub3),
|
$sub2, sub2), $sub3, sub3),
|
||||||
elemClass:$sub4, sub4), elemClass:$sub5, sub5),
|
$sub4, sub4), $sub5, sub5),
|
||||||
elemClass:$sub6, sub6), elemClass:$sub7, sub7)
|
$sub6, sub6), $sub7, sub7)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
|
class Vector16_Build <ValueType vecType, ValueType elemType> : Pat <
|
||||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
(vecType (build_vector elemType:$sub0, elemType:$sub1,
|
||||||
(vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
|
elemType:$sub2, elemType:$sub3,
|
||||||
(elemType elemClass:$sub2), (elemType elemClass:$sub3),
|
elemType:$sub4, elemType:$sub5,
|
||||||
(elemType elemClass:$sub4), (elemType elemClass:$sub5),
|
elemType:$sub6, elemType:$sub7,
|
||||||
(elemType elemClass:$sub6), (elemType elemClass:$sub7),
|
elemType:$sub8, elemType:$sub9,
|
||||||
(elemType elemClass:$sub8), (elemType elemClass:$sub9),
|
elemType:$sub10, elemType:$sub11,
|
||||||
(elemType elemClass:$sub10), (elemType elemClass:$sub11),
|
elemType:$sub12, elemType:$sub13,
|
||||||
(elemType elemClass:$sub12), (elemType elemClass:$sub13),
|
elemType:$sub14, elemType:$sub15)),
|
||||||
(elemType elemClass:$sub14), (elemType elemClass:$sub15))),
|
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||||
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
|
(vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
|
||||||
elemClass:$sub2, sub2), elemClass:$sub3, sub3),
|
$sub2, sub2), $sub3, sub3),
|
||||||
elemClass:$sub4, sub4), elemClass:$sub5, sub5),
|
$sub4, sub4), $sub5, sub5),
|
||||||
elemClass:$sub6, sub6), elemClass:$sub7, sub7),
|
$sub6, sub6), $sub7, sub7),
|
||||||
elemClass:$sub8, sub8), elemClass:$sub9, sub9),
|
$sub8, sub8), $sub9, sub9),
|
||||||
elemClass:$sub10, sub10), elemClass:$sub11, sub11),
|
$sub10, sub10), $sub11, sub11),
|
||||||
elemClass:$sub12, sub12), elemClass:$sub13, sub13),
|
$sub12, sub12), $sub13, sub13),
|
||||||
elemClass:$sub14, sub14), elemClass:$sub15, sub15)
|
$sub14, sub14), $sub15, sub15)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
|
||||||
|
// can handle COPY instructions.
|
||||||
// bitconvert pattern
|
// bitconvert pattern
|
||||||
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
|
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
|
||||||
(dt (bitconvert (st rc:$src0))),
|
(dt (bitconvert (st rc:$src0))),
|
||||||
(dt rc:$src0)
|
(dt rc:$src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
|
||||||
|
// can handle COPY instructions.
|
||||||
class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
|
class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
|
||||||
(vt (AMDGPUdwordaddr (vt rc:$addr))),
|
(vt (AMDGPUdwordaddr (vt rc:$addr))),
|
||||||
(vt rc:$addr)
|
(vt rc:$addr)
|
||||||
|
@ -983,58 +983,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
|
|||||||
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
|
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
|
||||||
def SETE : R600_2OP <
|
def SETE : R600_2OP <
|
||||||
0x08, "SETE",
|
0x08, "SETE",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
|
|
||||||
COND_EQ))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SGT : R600_2OP <
|
def SGT : R600_2OP <
|
||||||
0x09, "SETGT",
|
0x09, "SETGT",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
|
|
||||||
COND_GT))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SGE : R600_2OP <
|
def SGE : R600_2OP <
|
||||||
0xA, "SETGE",
|
0xA, "SETGE",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
|
|
||||||
COND_GE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SNE : R600_2OP <
|
def SNE : R600_2OP <
|
||||||
0xB, "SETNE",
|
0xB, "SETNE",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
|
|
||||||
COND_NE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETE_DX10 : R600_2OP <
|
def SETE_DX10 : R600_2OP <
|
||||||
0xC, "SETE_DX10",
|
0xC, "SETE_DX10",
|
||||||
[(set R600_Reg32:$dst,
|
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
|
|
||||||
COND_EQ))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETGT_DX10 : R600_2OP <
|
def SETGT_DX10 : R600_2OP <
|
||||||
0xD, "SETGT_DX10",
|
0xD, "SETGT_DX10",
|
||||||
[(set R600_Reg32:$dst,
|
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
|
|
||||||
COND_GT))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETGE_DX10 : R600_2OP <
|
def SETGE_DX10 : R600_2OP <
|
||||||
0xE, "SETGE_DX10",
|
0xE, "SETGE_DX10",
|
||||||
[(set R600_Reg32:$dst,
|
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
|
|
||||||
COND_GE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETNE_DX10 : R600_2OP <
|
def SETNE_DX10 : R600_2OP <
|
||||||
0xF, "SETNE_DX10",
|
0xF, "SETNE_DX10",
|
||||||
[(set R600_Reg32:$dst,
|
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
|
|
||||||
COND_NE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
|
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
|
||||||
@ -1092,38 +1076,32 @@ def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
|
|||||||
|
|
||||||
def SETE_INT : R600_2OP <
|
def SETE_INT : R600_2OP <
|
||||||
0x3A, "SETE_INT",
|
0x3A, "SETE_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETGT_INT : R600_2OP <
|
def SETGT_INT : R600_2OP <
|
||||||
0x3B, "SETGT_INT",
|
0x3B, "SETGT_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETGE_INT : R600_2OP <
|
def SETGE_INT : R600_2OP <
|
||||||
0x3C, "SETGE_INT",
|
0x3C, "SETGE_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETNE_INT : R600_2OP <
|
def SETNE_INT : R600_2OP <
|
||||||
0x3D, "SETNE_INT",
|
0x3D, "SETNE_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETGT_UINT : R600_2OP <
|
def SETGT_UINT : R600_2OP <
|
||||||
0x3E, "SETGT_UINT",
|
0x3E, "SETGT_UINT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SETGE_UINT : R600_2OP <
|
def SETGE_UINT : R600_2OP <
|
||||||
0x3F, "SETGE_UINT",
|
0x3F, "SETGE_UINT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
|
def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
|
||||||
@ -1133,26 +1111,17 @@ def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
|
|||||||
|
|
||||||
def CNDE_INT : R600_3OP <
|
def CNDE_INT : R600_3OP <
|
||||||
0x1C, "CNDE_INT",
|
0x1C, "CNDE_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), 0,
|
|
||||||
(i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
|
|
||||||
COND_EQ))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def CNDGE_INT : R600_3OP <
|
def CNDGE_INT : R600_3OP <
|
||||||
0x1E, "CNDGE_INT",
|
0x1E, "CNDGE_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), 0,
|
|
||||||
(i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
|
|
||||||
COND_GE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def CNDGT_INT : R600_3OP <
|
def CNDGT_INT : R600_3OP <
|
||||||
0x1D, "CNDGT_INT",
|
0x1D, "CNDGT_INT",
|
||||||
[(set (i32 R600_Reg32:$dst),
|
[(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
|
||||||
(selectcc (i32 R600_Reg32:$src0), 0,
|
|
||||||
(i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
|
|
||||||
COND_GT))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1161,7 +1130,7 @@ def CNDGT_INT : R600_3OP <
|
|||||||
|
|
||||||
def TEX_LD : R600_TEX <
|
def TEX_LD : R600_TEX <
|
||||||
0x03, "TEX_LD",
|
0x03, "TEX_LD",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
|
||||||
imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
|
imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
|
||||||
imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
> {
|
> {
|
||||||
@ -1174,19 +1143,19 @@ let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
|
|||||||
|
|
||||||
def TEX_GET_TEXTURE_RESINFO : R600_TEX <
|
def TEX_GET_TEXTURE_RESINFO : R600_TEX <
|
||||||
0x04, "TEX_GET_TEXTURE_RESINFO",
|
0x04, "TEX_GET_TEXTURE_RESINFO",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_GET_GRADIENTS_H : R600_TEX <
|
def TEX_GET_GRADIENTS_H : R600_TEX <
|
||||||
0x07, "TEX_GET_GRADIENTS_H",
|
0x07, "TEX_GET_GRADIENTS_H",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_GET_GRADIENTS_V : R600_TEX <
|
def TEX_GET_GRADIENTS_V : R600_TEX <
|
||||||
0x08, "TEX_GET_GRADIENTS_V",
|
0x08, "TEX_GET_GRADIENTS_V",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
@ -1202,37 +1171,37 @@ def TEX_SET_GRADIENTS_V : R600_TEX <
|
|||||||
|
|
||||||
def TEX_SAMPLE : R600_TEX <
|
def TEX_SAMPLE : R600_TEX <
|
||||||
0x10, "TEX_SAMPLE",
|
0x10, "TEX_SAMPLE",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_SAMPLE_C : R600_TEX <
|
def TEX_SAMPLE_C : R600_TEX <
|
||||||
0x18, "TEX_SAMPLE_C",
|
0x18, "TEX_SAMPLE_C",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_SAMPLE_L : R600_TEX <
|
def TEX_SAMPLE_L : R600_TEX <
|
||||||
0x11, "TEX_SAMPLE_L",
|
0x11, "TEX_SAMPLE_L",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_SAMPLE_C_L : R600_TEX <
|
def TEX_SAMPLE_C_L : R600_TEX <
|
||||||
0x19, "TEX_SAMPLE_C_L",
|
0x19, "TEX_SAMPLE_C_L",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_SAMPLE_LB : R600_TEX <
|
def TEX_SAMPLE_LB : R600_TEX <
|
||||||
0x12, "TEX_SAMPLE_LB",
|
0x12, "TEX_SAMPLE_LB",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def TEX_SAMPLE_C_LB : R600_TEX <
|
def TEX_SAMPLE_C_LB : R600_TEX <
|
||||||
0x1A, "TEX_SAMPLE_C_LB",
|
0x1A, "TEX_SAMPLE_C_LB",
|
||||||
[(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
|
[(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
@ -1262,32 +1231,22 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
|
|||||||
|
|
||||||
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
|
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
|
||||||
inst, "MULADD_IEEE",
|
inst, "MULADD_IEEE",
|
||||||
[(set (f32 R600_Reg32:$dst),
|
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
|
||||||
(fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class CNDE_Common <bits<5> inst> : R600_3OP <
|
class CNDE_Common <bits<5> inst> : R600_3OP <
|
||||||
inst, "CNDE",
|
inst, "CNDE",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), FP_ZERO,
|
|
||||||
(f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
|
|
||||||
COND_EQ))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class CNDGT_Common <bits<5> inst> : R600_3OP <
|
class CNDGT_Common <bits<5> inst> : R600_3OP <
|
||||||
inst, "CNDGT",
|
inst, "CNDGT",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), FP_ZERO,
|
|
||||||
(f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
|
|
||||||
COND_GT))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class CNDGE_Common <bits<5> inst> : R600_3OP <
|
class CNDGE_Common <bits<5> inst> : R600_3OP <
|
||||||
inst, "CNDGE",
|
inst, "CNDGE",
|
||||||
[(set R600_Reg32:$dst,
|
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
|
||||||
(selectcc (f32 R600_Reg32:$src0), FP_ZERO,
|
|
||||||
(f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
|
|
||||||
COND_GE))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
multiclass DOT4_Common <bits<11> inst> {
|
multiclass DOT4_Common <bits<11> inst> {
|
||||||
@ -1295,7 +1254,7 @@ multiclass DOT4_Common <bits<11> inst> {
|
|||||||
def _pseudo : R600_REDUCTION <inst,
|
def _pseudo : R600_REDUCTION <inst,
|
||||||
(ins R600_Reg128:$src0, R600_Reg128:$src1),
|
(ins R600_Reg128:$src0, R600_Reg128:$src1),
|
||||||
"DOT4 $dst $src0, $src1",
|
"DOT4 $dst $src0, $src1",
|
||||||
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
|
[(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def _real : R600_2OP <inst, "DOT4", []>;
|
def _real : R600_2OP <inst, "DOT4", []>;
|
||||||
@ -1308,7 +1267,7 @@ multiclass CUBE_Common <bits<11> inst> {
|
|||||||
(outs R600_Reg128:$dst),
|
(outs R600_Reg128:$dst),
|
||||||
(ins R600_Reg128:$src),
|
(ins R600_Reg128:$src),
|
||||||
"CUBE $dst $src",
|
"CUBE $dst $src",
|
||||||
[(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
|
[(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))],
|
||||||
VecALU
|
VecALU
|
||||||
> {
|
> {
|
||||||
let isPseudo = 1;
|
let isPseudo = 1;
|
||||||
@ -1398,7 +1357,7 @@ class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
|
|||||||
}
|
}
|
||||||
|
|
||||||
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
|
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
|
||||||
inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
|
inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
|
||||||
> {
|
> {
|
||||||
let TransOnly = 1;
|
let TransOnly = 1;
|
||||||
let Itinerary = TransALU;
|
let Itinerary = TransALU;
|
||||||
@ -1445,19 +1404,20 @@ class COS_Common <bits<11> inst> : R600_1OP <
|
|||||||
|
|
||||||
multiclass DIV_Common <InstR600 recip_ieee> {
|
multiclass DIV_Common <InstR600 recip_ieee> {
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
|
(int_AMDGPU_div f32:$src0, f32:$src1),
|
||||||
(MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
|
(MUL_IEEE $src0, (recip_ieee $src1))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(fdiv R600_Reg32:$src0, R600_Reg32:$src1),
|
(fdiv f32:$src0, f32:$src1),
|
||||||
(MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
|
(MUL_IEEE $src0, (recip_ieee $src1))
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
|
class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
|
||||||
(int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
|
: Pat <
|
||||||
(exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
|
(int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
|
||||||
|
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1497,11 +1457,10 @@ let Predicates = [isR600] in {
|
|||||||
def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
|
def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
|
||||||
|
|
||||||
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
|
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
|
||||||
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
|
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
|
||||||
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
|
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
|
||||||
|
|
||||||
def : Pat<(fsqrt R600_Reg32:$src),
|
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
|
||||||
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
|
|
||||||
|
|
||||||
def R600_ExportSwz : ExportSwzInst {
|
def R600_ExportSwz : ExportSwzInst {
|
||||||
let Word1{20-17} = 0; // BURST_COUNT
|
let Word1{20-17} = 0; // BURST_COUNT
|
||||||
@ -1577,13 +1536,13 @@ let Predicates = [isR600] in {
|
|||||||
// Helper pattern for normalizing inputs to triginomic instructions for R700+
|
// Helper pattern for normalizing inputs to triginomic instructions for R700+
|
||||||
// cards.
|
// cards.
|
||||||
class COS_PAT <InstR600 trig> : Pat<
|
class COS_PAT <InstR600 trig> : Pat<
|
||||||
(fcos R600_Reg32:$src),
|
(fcos f32:$src),
|
||||||
(trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
|
(trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class SIN_PAT <InstR600 trig> : Pat<
|
class SIN_PAT <InstR600 trig> : Pat<
|
||||||
(fsin R600_Reg32:$src),
|
(fsin f32:$src),
|
||||||
(trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
|
(trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1621,11 +1580,10 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
|
|||||||
def SIN_eg : SIN_Common<0x8D>;
|
def SIN_eg : SIN_Common<0x8D>;
|
||||||
def COS_eg : COS_Common<0x8E>;
|
def COS_eg : COS_Common<0x8E>;
|
||||||
|
|
||||||
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
|
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
|
||||||
def : SIN_PAT <SIN_eg>;
|
def : SIN_PAT <SIN_eg>;
|
||||||
def : COS_PAT <COS_eg>;
|
def : COS_PAT <COS_eg>;
|
||||||
def : Pat<(fsqrt R600_Reg32:$src),
|
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
|
||||||
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
|
|
||||||
} // End Predicates = [isEG]
|
} // End Predicates = [isEG]
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1649,9 +1607,8 @@ let Predicates = [isEGorCayman] in {
|
|||||||
// (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
|
// (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
|
||||||
// (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
|
// (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
|
||||||
def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
|
def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
|
||||||
[(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
|
[(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
|
||||||
R600_Reg32:$src1,
|
i32:$src2))],
|
||||||
R600_Reg32:$src2))],
|
|
||||||
VecALU
|
VecALU
|
||||||
>;
|
>;
|
||||||
|
|
||||||
@ -1659,8 +1616,7 @@ let Predicates = [isEGorCayman] in {
|
|||||||
defm : BFIPatterns <BFI_INT_eg>;
|
defm : BFIPatterns <BFI_INT_eg>;
|
||||||
|
|
||||||
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
|
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
|
||||||
[(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
|
[(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
|
||||||
R600_Reg32:$src2))],
|
|
||||||
VecALU
|
VecALU
|
||||||
>;
|
>;
|
||||||
|
|
||||||
@ -1705,11 +1661,9 @@ let hasSideEffects = 1 in {
|
|||||||
// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
|
// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
|
||||||
// which do not need to be truncated since the fp values are 0.0f or 1.0f.
|
// which do not need to be truncated since the fp values are 0.0f or 1.0f.
|
||||||
// We should look into handling these cases separately.
|
// We should look into handling these cases separately.
|
||||||
def : Pat<(fp_to_sint R600_Reg32:$src0),
|
def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
|
||||||
(FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
|
|
||||||
|
|
||||||
def : Pat<(fp_to_uint R600_Reg32:$src0),
|
def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
|
||||||
(FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
|
|
||||||
|
|
||||||
def EG_ExportSwz : ExportSwzInst {
|
def EG_ExportSwz : ExportSwzInst {
|
||||||
let Word1{19-16} = 0; // BURST_COUNT
|
let Word1{19-16} = 0; // BURST_COUNT
|
||||||
@ -1811,14 +1765,14 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
|
|||||||
def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
|
def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
|
||||||
(ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
|
(ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
|
||||||
0x1, "RAT_WRITE_CACHELESS_32_eg",
|
0x1, "RAT_WRITE_CACHELESS_32_eg",
|
||||||
[(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
|
[(global_store i32:$rw_gpr, i32:$index_gpr)]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//128-bit store
|
//128-bit store
|
||||||
def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
|
def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
|
||||||
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
|
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
|
||||||
0xf, "RAT_WRITE_CACHELESS_128",
|
0xf, "RAT_WRITE_CACHELESS_128",
|
||||||
[(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
|
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
||||||
@ -1943,19 +1897,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
|
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
|
||||||
[(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
|
[(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
|
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
|
||||||
[(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
|
[(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
|
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
|
||||||
[(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
|
[(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
|
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
|
||||||
[(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
|
[(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1964,17 +1918,17 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
|
|||||||
|
|
||||||
// 8-bit reads
|
// 8-bit reads
|
||||||
def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
|
def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
|
||||||
[(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
|
[(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 32-bit reads
|
// 32-bit reads
|
||||||
def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
|
def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
|
||||||
[(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
|
[(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 128-bit reads
|
// 128-bit reads
|
||||||
def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
|
def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
|
||||||
[(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
|
[(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1983,7 +1937,7 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
|
def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
|
||||||
[(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
|
[(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -2013,7 +1967,7 @@ def SIN_cm : SIN_Common<0x8D>;
|
|||||||
def COS_cm : COS_Common<0x8E>;
|
def COS_cm : COS_Common<0x8E>;
|
||||||
} // End isVector = 1
|
} // End isVector = 1
|
||||||
|
|
||||||
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
|
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
|
||||||
def : SIN_PAT <SIN_cm>;
|
def : SIN_PAT <SIN_cm>;
|
||||||
def : COS_PAT <COS_cm>;
|
def : COS_PAT <COS_cm>;
|
||||||
|
|
||||||
@ -2022,8 +1976,8 @@ defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
|
|||||||
// RECIP_UINT emulation for Cayman
|
// RECIP_UINT emulation for Cayman
|
||||||
// The multiplication scales from [0,1] to the unsigned integer range
|
// The multiplication scales from [0,1] to the unsigned integer range
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(AMDGPUurecip R600_Reg32:$src0),
|
(AMDGPUurecip i32:$src0),
|
||||||
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
|
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
|
||||||
(MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
|
(MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
@ -2033,8 +1987,7 @@ def : Pat <
|
|||||||
let COUNT = 0;
|
let COUNT = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsqrt R600_Reg32:$src),
|
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
|
||||||
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
|
|
||||||
|
|
||||||
} // End isCayman
|
} // End isCayman
|
||||||
|
|
||||||
@ -2099,17 +2052,23 @@ def MASK_WRITE : AMDGPUShaderInst <
|
|||||||
|
|
||||||
def TXD: InstR600 <
|
def TXD: InstR600 <
|
||||||
(outs R600_Reg128:$dst),
|
(outs R600_Reg128:$dst),
|
||||||
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
|
||||||
|
i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
||||||
"TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
|
"TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
|
||||||
[(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))], NullALU> {
|
[(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
|
||||||
|
imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
|
||||||
|
NullALU > {
|
||||||
let TEXInst = 1;
|
let TEXInst = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
def TXD_SHADOW: InstR600 <
|
def TXD_SHADOW: InstR600 <
|
||||||
(outs R600_Reg128:$dst),
|
(outs R600_Reg128:$dst),
|
||||||
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
|
||||||
|
i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
||||||
"TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
|
"TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
|
||||||
[(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], NullALU
|
[(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
|
||||||
|
imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
|
||||||
|
NullALU
|
||||||
> {
|
> {
|
||||||
let TEXInst = 1;
|
let TEXInst = 1;
|
||||||
}
|
}
|
||||||
@ -2149,7 +2108,7 @@ def CONST_COPY : Instruction {
|
|||||||
|
|
||||||
def TEX_VTX_CONSTBUF :
|
def TEX_VTX_CONSTBUF :
|
||||||
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
|
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
|
||||||
[(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
|
[(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
|
||||||
VTX_WORD1_GPR, VTX_WORD0 {
|
VTX_WORD1_GPR, VTX_WORD0 {
|
||||||
|
|
||||||
let VC_INST = 0;
|
let VC_INST = 0;
|
||||||
@ -2203,7 +2162,7 @@ def TEX_VTX_CONSTBUF :
|
|||||||
|
|
||||||
def TEX_VTX_TEXBUF:
|
def TEX_VTX_TEXBUF:
|
||||||
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
|
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
|
||||||
[(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
|
[(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
|
||||||
VTX_WORD1_GPR, VTX_WORD0 {
|
VTX_WORD1_GPR, VTX_WORD0 {
|
||||||
|
|
||||||
let VC_INST = 0;
|
let VC_INST = 0;
|
||||||
@ -2329,9 +2288,8 @@ let isTerminator=1 in {
|
|||||||
// CND*_INT Pattterns for f32 True / False values
|
// CND*_INT Pattterns for f32 True / False values
|
||||||
|
|
||||||
class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
|
class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
|
||||||
(selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
|
(selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
|
||||||
R600_Reg32:$src2, cc),
|
(cnd $src0, $src1, $src2)
|
||||||
(cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : CND_INT_f32 <CNDE_INT, SETEQ>;
|
def : CND_INT_f32 <CNDE_INT, SETEQ>;
|
||||||
@ -2340,9 +2298,8 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>;
|
|||||||
|
|
||||||
//CNDGE_INT extra pattern
|
//CNDGE_INT extra pattern
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
|
(selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
|
||||||
(i32 R600_Reg32:$src2), COND_GT),
|
(CNDGE_INT $src0, $src1, $src2)
|
||||||
(CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// KIL Patterns
|
// KIL Patterns
|
||||||
@ -2352,56 +2309,56 @@ def KILP : Pat <
|
|||||||
>;
|
>;
|
||||||
|
|
||||||
def KIL : Pat <
|
def KIL : Pat <
|
||||||
(int_AMDGPU_kill R600_Reg32:$src0),
|
(int_AMDGPU_kill f32:$src0),
|
||||||
(MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
|
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SGT Reverse args
|
// SGT Reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
|
(selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
|
||||||
(SGT R600_Reg32:$src1, R600_Reg32:$src0)
|
(SGT $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SGE Reverse args
|
// SGE Reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
|
(selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
|
||||||
(SGE R600_Reg32:$src1, R600_Reg32:$src0)
|
(SGE $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SETGT_DX10 reverse args
|
// SETGT_DX10 reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
|
(selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
|
||||||
(SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
|
(SETGT_DX10 $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SETGE_DX10 reverse args
|
// SETGE_DX10 reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
|
(selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
|
||||||
(SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
|
(SETGE_DX10 $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SETGT_INT reverse args
|
// SETGT_INT reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
|
(selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
|
||||||
(SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
|
(SETGT_INT $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SETGE_INT reverse args
|
// SETGE_INT reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
|
(selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
|
||||||
(SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
|
(SETGE_INT $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SETGT_UINT reverse args
|
// SETGT_UINT reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
|
(selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
|
||||||
(SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
|
(SETGT_UINT $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// SETGE_UINT reverse args
|
// SETGE_UINT reverse args
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
|
(selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
|
||||||
(SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
|
(SETGE_UINT $src1, $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// The next two patterns are special cases for handling 'true if ordered' and
|
// The next two patterns are special cases for handling 'true if ordered' and
|
||||||
@ -2414,50 +2371,50 @@ def : Pat <
|
|||||||
|
|
||||||
//SETE - 'true if ordered'
|
//SETE - 'true if ordered'
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
|
(selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
|
||||||
(SETE R600_Reg32:$src0, R600_Reg32:$src1)
|
(SETE $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//SETE_DX10 - 'true if ordered'
|
//SETE_DX10 - 'true if ordered'
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
|
(selectcc f32:$src0, f32:$src1, -1, 0, SETO),
|
||||||
(SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
|
(SETE_DX10 $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//SNE - 'true if unordered'
|
//SNE - 'true if unordered'
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
|
(selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
|
||||||
(SNE R600_Reg32:$src0, R600_Reg32:$src1)
|
(SNE $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//SETNE_DX10 - 'true if ordered'
|
//SETNE_DX10 - 'true if ordered'
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
|
(selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
|
||||||
(SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
|
(SETNE_DX10 $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
|
def : Extract_Element <f32, v4f32, 0, sub0>;
|
||||||
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
|
def : Extract_Element <f32, v4f32, 1, sub1>;
|
||||||
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
|
def : Extract_Element <f32, v4f32, 2, sub2>;
|
||||||
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
|
def : Extract_Element <f32, v4f32, 3, sub3>;
|
||||||
|
|
||||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
|
def : Insert_Element <f32, v4f32, 0, sub0>;
|
||||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
|
def : Insert_Element <f32, v4f32, 1, sub1>;
|
||||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
|
def : Insert_Element <f32, v4f32, 2, sub2>;
|
||||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
|
def : Insert_Element <f32, v4f32, 3, sub3>;
|
||||||
|
|
||||||
def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
|
def : Extract_Element <i32, v4i32, 0, sub0>;
|
||||||
def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
|
def : Extract_Element <i32, v4i32, 1, sub1>;
|
||||||
def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
|
def : Extract_Element <i32, v4i32, 2, sub2>;
|
||||||
def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
|
def : Extract_Element <i32, v4i32, 3, sub3>;
|
||||||
|
|
||||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
|
def : Insert_Element <i32, v4i32, 0, sub0>;
|
||||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
|
def : Insert_Element <i32, v4i32, 1, sub1>;
|
||||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
|
def : Insert_Element <i32, v4i32, 2, sub2>;
|
||||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
|
def : Insert_Element <i32, v4i32, 3, sub3>;
|
||||||
|
|
||||||
def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
|
def : Vector4_Build <v4f32, f32>;
|
||||||
def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
|
def : Vector4_Build <v4i32, i32>;
|
||||||
|
|
||||||
// bitconvert patterns
|
// bitconvert patterns
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
|
|||||||
def S_CMPK_EQ_I32 : SOPK <
|
def S_CMPK_EQ_I32 : SOPK <
|
||||||
0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
|
0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
|
||||||
"S_CMPK_EQ_I32",
|
"S_CMPK_EQ_I32",
|
||||||
[(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
|
[(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
|
||||||
>;
|
>;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -600,12 +600,12 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
|
|||||||
//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
|
//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
|
||||||
//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
|
//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
|
||||||
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
|
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
|
||||||
[(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
|
[(set f32:$dst, (sint_to_fp i32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
|
defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
|
||||||
defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
|
defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
|
||||||
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
|
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
|
||||||
[(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
|
[(set i32:$dst, (fp_to_sint f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
|
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
|
||||||
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
|
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
|
||||||
@ -622,35 +622,35 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
|
|||||||
//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
|
//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
|
||||||
//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
|
//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
|
||||||
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
|
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
|
||||||
[(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
|
[(set f32:$dst, (AMDGPUfract f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
|
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
|
||||||
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
|
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
|
||||||
[(set VReg_32:$dst, (fceil VSrc_32:$src0))]
|
[(set f32:$dst, (fceil f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
|
defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
|
||||||
[(set VReg_32:$dst, (frint VSrc_32:$src0))]
|
[(set f32:$dst, (frint f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
|
defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
|
||||||
[(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
|
[(set f32:$dst, (ffloor f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
|
defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
|
||||||
[(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
|
[(set f32:$dst, (fexp2 f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
|
defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
|
||||||
defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
|
defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
|
||||||
[(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
|
[(set f32:$dst, (flog2 f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
|
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
|
||||||
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
|
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
|
||||||
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
|
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
|
||||||
[(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
|
[(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
|
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
|
||||||
defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
|
defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
|
||||||
defm V_RSQ_LEGACY_F32 : VOP1_32 <
|
defm V_RSQ_LEGACY_F32 : VOP1_32 <
|
||||||
0x0000002d, "V_RSQ_LEGACY_F32",
|
0x0000002d, "V_RSQ_LEGACY_F32",
|
||||||
[(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
|
[(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
|
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
|
||||||
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
|
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
|
||||||
@ -793,14 +793,13 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
|
|||||||
(ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
|
(ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
|
||||||
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
|
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
|
||||||
"V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
|
"V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
|
||||||
[(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
|
[(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
|
||||||
VSrc_32:$src1, VSrc_32:$src0))]
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
//f32 pattern for V_CNDMASK_B32_e64
|
//f32 pattern for V_CNDMASK_B32_e64
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
|
(f32 (select i1:$src2, f32:$src1, f32:$src0)),
|
||||||
(V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
|
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
|
defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
|
||||||
@ -808,11 +807,11 @@ defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
|
|||||||
|
|
||||||
let isCommutable = 1 in {
|
let isCommutable = 1 in {
|
||||||
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
|
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
|
||||||
[(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
|
[(set f32:$dst, (fadd f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
|
defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
|
||||||
[(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
|
[(set f32:$dst, (fsub f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
|
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
|
||||||
} // End isCommutable = 1
|
} // End isCommutable = 1
|
||||||
@ -823,11 +822,11 @@ let isCommutable = 1 in {
|
|||||||
|
|
||||||
defm V_MUL_LEGACY_F32 : VOP2_32 <
|
defm V_MUL_LEGACY_F32 : VOP2_32 <
|
||||||
0x00000007, "V_MUL_LEGACY_F32",
|
0x00000007, "V_MUL_LEGACY_F32",
|
||||||
[(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
|
[(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
|
defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
|
||||||
[(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
|
[(set f32:$dst, (fmul f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // End isCommutable = 1
|
} // End isCommutable = 1
|
||||||
@ -840,11 +839,11 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
|
|||||||
let isCommutable = 1 in {
|
let isCommutable = 1 in {
|
||||||
|
|
||||||
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
|
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
|
||||||
[(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
|
[(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
|
defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
|
||||||
[(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
|
[(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
|
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
|
||||||
@ -855,28 +854,28 @@ defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
|
|||||||
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
|
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
|
||||||
|
|
||||||
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
|
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
|
||||||
[(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
|
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
|
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
|
||||||
|
|
||||||
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
|
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
|
||||||
[(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
|
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
|
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
|
||||||
|
|
||||||
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
|
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
|
||||||
[(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
|
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
|
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
|
||||||
|
|
||||||
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
|
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
|
||||||
[(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
|
[(set i32:$dst, (and i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
|
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
|
||||||
[(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
|
[(set i32:$dst, (or i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
|
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
|
||||||
[(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
|
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // End isCommutable = 1
|
} // End isCommutable = 1
|
||||||
@ -891,11 +890,11 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
|
|||||||
|
|
||||||
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
|
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
|
||||||
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
|
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
|
||||||
[(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
|
[(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
|
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
|
||||||
[(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
|
[(set i32:$dst, (sub i32:$src0, i32:$src1))]
|
||||||
>;
|
>;
|
||||||
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
|
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
|
||||||
|
|
||||||
@ -911,7 +910,7 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
|
|||||||
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
|
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
|
||||||
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
|
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
|
||||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
|
defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
|
||||||
[(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
|
[(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
|
||||||
>;
|
>;
|
||||||
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
|
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
|
||||||
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
|
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
|
||||||
@ -990,18 +989,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
|
|||||||
} // isCommutable = 1
|
} // isCommutable = 1
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(mul VSrc_32:$src0, VReg_32:$src1),
|
(mul i32:$src0, i32:$src1),
|
||||||
(V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0))
|
(V_MUL_LO_I32 $src0, $src1, (i32 0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(mulhu VSrc_32:$src0, VReg_32:$src1),
|
(mulhu i32:$src0, i32:$src1),
|
||||||
(V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0))
|
(V_MUL_HI_U32 $src0, $src1, (i32 0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(mulhs VSrc_32:$src0, VReg_32:$src1),
|
(mulhs i32:$src0, i32:$src1),
|
||||||
(V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0))
|
(V_MUL_HI_I32 $src0, $src1, (i32 0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
|
def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
|
||||||
@ -1034,19 +1033,19 @@ def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
|
|||||||
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
|
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
|
||||||
|
|
||||||
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
|
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
|
||||||
[(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
|
[(set i64:$dst, (and i64:$src0, i64:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(i1 (and SSrc_64:$src0, SSrc_64:$src1)),
|
(i1 (and i1:$src0, i1:$src1)),
|
||||||
(S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
|
(S_AND_B64 $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
|
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
|
||||||
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
|
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(i1 (or SSrc_64:$src0, SSrc_64:$src1)),
|
(i1 (or i1:$src0, i1:$src1)),
|
||||||
(S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
|
(S_OR_B64 $src0, $src1)
|
||||||
>;
|
>;
|
||||||
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
|
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
|
||||||
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
|
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
|
||||||
@ -1097,14 +1096,14 @@ def SI_IF : InstSI <
|
|||||||
(outs SReg_64:$dst),
|
(outs SReg_64:$dst),
|
||||||
(ins SReg_64:$vcc, brtarget:$target),
|
(ins SReg_64:$vcc, brtarget:$target),
|
||||||
"SI_IF $dst, $vcc, $target",
|
"SI_IF $dst, $vcc, $target",
|
||||||
[(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
|
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SI_ELSE : InstSI <
|
def SI_ELSE : InstSI <
|
||||||
(outs SReg_64:$dst),
|
(outs SReg_64:$dst),
|
||||||
(ins SReg_64:$src, brtarget:$target),
|
(ins SReg_64:$src, brtarget:$target),
|
||||||
"SI_ELSE $dst, $src, $target",
|
"SI_ELSE $dst, $src, $target",
|
||||||
[(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
|
[(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
|
||||||
|
|
||||||
let Constraints = "$src = $dst";
|
let Constraints = "$src = $dst";
|
||||||
}
|
}
|
||||||
@ -1113,7 +1112,7 @@ def SI_LOOP : InstSI <
|
|||||||
(outs),
|
(outs),
|
||||||
(ins SReg_64:$saved, brtarget:$target),
|
(ins SReg_64:$saved, brtarget:$target),
|
||||||
"SI_LOOP $saved, $target",
|
"SI_LOOP $saved, $target",
|
||||||
[(int_SI_loop SReg_64:$saved, bb:$target)]
|
[(int_SI_loop i64:$saved, bb:$target)]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // end isBranch = 1, isTerminator = 1
|
} // end isBranch = 1, isTerminator = 1
|
||||||
@ -1122,35 +1121,35 @@ def SI_BREAK : InstSI <
|
|||||||
(outs SReg_64:$dst),
|
(outs SReg_64:$dst),
|
||||||
(ins SReg_64:$src),
|
(ins SReg_64:$src),
|
||||||
"SI_ELSE $dst, $src",
|
"SI_ELSE $dst, $src",
|
||||||
[(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
|
[(set i64:$dst, (int_SI_break i64:$src))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SI_IF_BREAK : InstSI <
|
def SI_IF_BREAK : InstSI <
|
||||||
(outs SReg_64:$dst),
|
(outs SReg_64:$dst),
|
||||||
(ins SReg_64:$vcc, SReg_64:$src),
|
(ins SReg_64:$vcc, SReg_64:$src),
|
||||||
"SI_IF_BREAK $dst, $vcc, $src",
|
"SI_IF_BREAK $dst, $vcc, $src",
|
||||||
[(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
|
[(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SI_ELSE_BREAK : InstSI <
|
def SI_ELSE_BREAK : InstSI <
|
||||||
(outs SReg_64:$dst),
|
(outs SReg_64:$dst),
|
||||||
(ins SReg_64:$src0, SReg_64:$src1),
|
(ins SReg_64:$src0, SReg_64:$src1),
|
||||||
"SI_ELSE_BREAK $dst, $src0, $src1",
|
"SI_ELSE_BREAK $dst, $src0, $src1",
|
||||||
[(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
|
[(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SI_END_CF : InstSI <
|
def SI_END_CF : InstSI <
|
||||||
(outs),
|
(outs),
|
||||||
(ins SReg_64:$saved),
|
(ins SReg_64:$saved),
|
||||||
"SI_END_CF $saved",
|
"SI_END_CF $saved",
|
||||||
[(int_SI_end_cf SReg_64:$saved)]
|
[(int_SI_end_cf i64:$saved)]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SI_KILL : InstSI <
|
def SI_KILL : InstSI <
|
||||||
(outs),
|
(outs),
|
||||||
(ins VReg_32:$src),
|
(ins VReg_32:$src),
|
||||||
"SI_KIL $src",
|
"SI_KIL $src",
|
||||||
[(int_AMDGPU_kill VReg_32:$src)]
|
[(int_AMDGPU_kill f32:$src)]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
||||||
@ -1184,8 +1183,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
|
|||||||
} // end IsCodeGenOnly, isPseudo
|
} // end IsCodeGenOnly, isPseudo
|
||||||
|
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
|
(int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
|
||||||
(V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
|
(V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
@ -1195,92 +1194,80 @@ def : Pat <
|
|||||||
|
|
||||||
/* int_SI_vs_load_input */
|
/* int_SI_vs_load_input */
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
|
(int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset,
|
||||||
VReg_32:$buf_idx_vgpr),
|
i32:$buf_idx_vgpr),
|
||||||
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
|
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
|
||||||
VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
|
$buf_idx_vgpr, $tlst, 0, 0, 0)
|
||||||
0, 0, 0)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* int_SI_export */
|
/* int_SI_export */
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
|
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
|
||||||
VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
|
f32:$src0, f32:$src1, f32:$src2, f32:$src3),
|
||||||
(EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
|
(EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
|
||||||
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
|
$src0, $src1, $src2, $src3)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
/********** ======================= **********/
|
||||||
|
/********** Image sampling patterns **********/
|
||||||
|
/********** ======================= **********/
|
||||||
|
|
||||||
/* int_SI_sample for simple 1D texture lookup */
|
/* int_SI_sample for simple 1D texture lookup */
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_sample VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler, imm),
|
(int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
|
||||||
(IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
|
(IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
SReg_256:$rsrc, SReg_128:$sampler)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
|
class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
|
||||||
ValueType addr_type> : Pat <
|
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
|
||||||
(name (addr_type addr_class:$addr),
|
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
SReg_256:$rsrc, SReg_128:$sampler, imm),
|
|
||||||
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
|
|
||||||
SReg_256:$rsrc, SReg_128:$sampler)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
|
class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
|
||||||
ValueType addr_type> : Pat <
|
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
|
||||||
(name (addr_type addr_class:$addr),
|
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
|
|
||||||
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
|
|
||||||
SReg_256:$rsrc, SReg_128:$sampler)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
|
class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
|
||||||
ValueType addr_type> : Pat <
|
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
|
||||||
(name (addr_type addr_class:$addr),
|
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
|
|
||||||
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
|
|
||||||
SReg_256:$rsrc, SReg_128:$sampler)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class SampleShadowPattern<Intrinsic name, MIMG opcode,
|
class SampleShadowPattern<Intrinsic name, MIMG opcode,
|
||||||
RegisterClass addr_class, ValueType addr_type> : Pat <
|
ValueType vt> : Pat <
|
||||||
(name (addr_type addr_class:$addr),
|
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
|
||||||
SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
|
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
|
|
||||||
SReg_256:$rsrc, SReg_128:$sampler)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
|
class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
|
||||||
RegisterClass addr_class, ValueType addr_type> : Pat <
|
ValueType vt> : Pat <
|
||||||
(name (addr_type addr_class:$addr),
|
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
|
||||||
SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
|
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
|
|
||||||
SReg_256:$rsrc, SReg_128:$sampler)
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* int_SI_sample* for texture lookups consuming more address parameters */
|
/* int_SI_sample* for texture lookups consuming more address parameters */
|
||||||
multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
|
multiclass SamplePatterns<ValueType addr_type> {
|
||||||
def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
|
def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
|
||||||
def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
|
def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
|
||||||
def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
|
def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
|
||||||
def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
|
def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
|
||||||
def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
|
def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
|
||||||
|
|
||||||
def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
|
def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
|
||||||
def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
|
def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
|
||||||
def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
|
def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
|
||||||
def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
|
def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
|
||||||
|
|
||||||
def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
|
def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
|
||||||
def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
|
def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
|
||||||
def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
|
def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
|
||||||
def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
|
def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm : SamplePatterns<VReg_64, v2i32>;
|
defm : SamplePatterns<v2i32>;
|
||||||
defm : SamplePatterns<VReg_128, v4i32>;
|
defm : SamplePatterns<v4i32>;
|
||||||
defm : SamplePatterns<VReg_256, v8i32>;
|
defm : SamplePatterns<v8i32>;
|
||||||
defm : SamplePatterns<VReg_512, v16i32>;
|
defm : SamplePatterns<v16i32>;
|
||||||
|
|
||||||
/********** ============================================ **********/
|
/********** ============================================ **********/
|
||||||
/********** Extraction, Insertion, Building and Casting **********/
|
/********** Extraction, Insertion, Building and Casting **********/
|
||||||
@ -1288,77 +1275,77 @@ defm : SamplePatterns<VReg_512, v16i32>;
|
|||||||
|
|
||||||
foreach Index = 0-2 in {
|
foreach Index = 0-2 in {
|
||||||
def Extract_Element_v2i32_#Index : Extract_Element <
|
def Extract_Element_v2i32_#Index : Extract_Element <
|
||||||
i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v2i32_#Index : Insert_Element <
|
def Insert_Element_v2i32_#Index : Insert_Element <
|
||||||
i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def Extract_Element_v2f32_#Index : Extract_Element <
|
def Extract_Element_v2f32_#Index : Extract_Element <
|
||||||
f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v2f32_#Index : Insert_Element <
|
def Insert_Element_v2f32_#Index : Insert_Element <
|
||||||
f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach Index = 0-3 in {
|
foreach Index = 0-3 in {
|
||||||
def Extract_Element_v4i32_#Index : Extract_Element <
|
def Extract_Element_v4i32_#Index : Extract_Element <
|
||||||
i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v4i32_#Index : Insert_Element <
|
def Insert_Element_v4i32_#Index : Insert_Element <
|
||||||
i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def Extract_Element_v4f32_#Index : Extract_Element <
|
def Extract_Element_v4f32_#Index : Extract_Element <
|
||||||
f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v4f32_#Index : Insert_Element <
|
def Insert_Element_v4f32_#Index : Insert_Element <
|
||||||
f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach Index = 0-7 in {
|
foreach Index = 0-7 in {
|
||||||
def Extract_Element_v8i32_#Index : Extract_Element <
|
def Extract_Element_v8i32_#Index : Extract_Element <
|
||||||
i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v8i32_#Index : Insert_Element <
|
def Insert_Element_v8i32_#Index : Insert_Element <
|
||||||
i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def Extract_Element_v8f32_#Index : Extract_Element <
|
def Extract_Element_v8f32_#Index : Extract_Element <
|
||||||
f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v8f32_#Index : Insert_Element <
|
def Insert_Element_v8f32_#Index : Insert_Element <
|
||||||
f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach Index = 0-15 in {
|
foreach Index = 0-15 in {
|
||||||
def Extract_Element_v16i32_#Index : Extract_Element <
|
def Extract_Element_v16i32_#Index : Extract_Element <
|
||||||
i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v16i32_#Index : Insert_Element <
|
def Insert_Element_v16i32_#Index : Insert_Element <
|
||||||
i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
|
i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def Extract_Element_v16f32_#Index : Extract_Element <
|
def Extract_Element_v16f32_#Index : Extract_Element <
|
||||||
f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
def Insert_Element_v16f32_#Index : Insert_Element <
|
def Insert_Element_v16f32_#Index : Insert_Element <
|
||||||
f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
|
f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
|
def : Vector1_Build <v1i32, i32, VReg_32>;
|
||||||
def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
|
def : Vector2_Build <v2i32, i32>;
|
||||||
def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
|
def : Vector2_Build <v2f32, f32>;
|
||||||
def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
|
def : Vector4_Build <v4i32, i32>;
|
||||||
def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
|
def : Vector4_Build <v4f32, f32>;
|
||||||
def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
|
def : Vector8_Build <v8i32, i32>;
|
||||||
def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
|
def : Vector8_Build <v8f32, f32>;
|
||||||
def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
|
def : Vector16_Build <v16i32, i32>;
|
||||||
def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
|
def : Vector16_Build <v16f32, f32>;
|
||||||
|
|
||||||
def : BitConvert <i32, f32, SReg_32>;
|
def : BitConvert <i32, f32, SReg_32>;
|
||||||
def : BitConvert <i32, f32, VReg_32>;
|
def : BitConvert <i32, f32, VReg_32>;
|
||||||
@ -1371,20 +1358,20 @@ def : BitConvert <f32, i32, VReg_32>;
|
|||||||
/********** =================== **********/
|
/********** =================== **********/
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
|
(int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
|
||||||
(V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
|
(V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
|
||||||
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fabs VReg_32:$src),
|
(fabs f32:$src),
|
||||||
(V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
|
(V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
|
||||||
1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fneg VReg_32:$src),
|
(fneg f32:$src),
|
||||||
(V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
|
(V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
|
||||||
0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
|
0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
@ -1425,16 +1412,16 @@ def : Pat <
|
|||||||
/********** ===================== **********/
|
/********** ===================== **********/
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
|
(int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
|
||||||
(V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
|
(V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
|
(int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
|
||||||
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
|
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
|
||||||
imm:$attr_chan, imm:$attr, M0Reg:$params),
|
imm:$attr_chan, imm:$attr, i32:$params),
|
||||||
(EXTRACT_SUBREG VReg_64:$ij, sub1),
|
(EXTRACT_SUBREG $ij, sub1),
|
||||||
imm:$attr_chan, imm:$attr, M0Reg:$params)
|
imm:$attr_chan, imm:$attr, $params)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
@ -1442,70 +1429,70 @@ def : Pat <
|
|||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
|
|
||||||
/* llvm.AMDGPU.pow */
|
/* llvm.AMDGPU.pow */
|
||||||
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
|
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
|
(int_AMDGPU_div f32:$src0, f32:$src1),
|
||||||
(V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
|
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(fdiv VSrc_32:$src0, VSrc_32:$src1),
|
(fdiv f32:$src0, f32:$src1),
|
||||||
(V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
|
(V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fcos VSrc_32:$src0),
|
(fcos f32:$src0),
|
||||||
(V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
(V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fsin VSrc_32:$src0),
|
(fsin f32:$src0),
|
||||||
(V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
(V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_AMDGPU_cube VReg_128:$src),
|
(int_AMDGPU_cube v4f32:$src),
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
||||||
(V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
(V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
(EXTRACT_SUBREG $src, sub1),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub2)),
|
(EXTRACT_SUBREG $src, sub2)),
|
||||||
sub0),
|
sub0),
|
||||||
(V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
(V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
(EXTRACT_SUBREG $src, sub1),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub2)),
|
(EXTRACT_SUBREG $src, sub2)),
|
||||||
sub1),
|
sub1),
|
||||||
(V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
(V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
(EXTRACT_SUBREG $src, sub1),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub2)),
|
(EXTRACT_SUBREG $src, sub2)),
|
||||||
sub2),
|
sub2),
|
||||||
(V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
(V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
(EXTRACT_SUBREG $src, sub1),
|
||||||
(EXTRACT_SUBREG VReg_128:$src, sub2)),
|
(EXTRACT_SUBREG $src, sub2)),
|
||||||
sub3)
|
sub3)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(i32 (sext (i1 SReg_64:$src0))),
|
(i32 (sext i1:$src0)),
|
||||||
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
|
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 1. Offset as 8bit DWORD immediate
|
// 1. Offset as 8bit DWORD immediate
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
|
(int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
|
||||||
(S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
|
(S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 2. Offset loaded in an 32bit SGPR
|
// 2. Offset loaded in an 32bit SGPR
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_load_const SReg_128:$sbase, imm:$offset),
|
(int_SI_load_const v16i8:$sbase, imm:$offset),
|
||||||
(S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
|
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 3. Offset in an 32Bit VGPR
|
// 3. Offset in an 32Bit VGPR
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
|
(int_SI_load_const v16i8:$sbase, i32:$voff),
|
||||||
(BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
|
(BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, $voff, $sbase, 0, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// The multiplication scales from [0,1] to the unsigned integer range
|
// The multiplication scales from [0,1] to the unsigned integer range
|
||||||
@ -1520,30 +1507,33 @@ def : Pat <
|
|||||||
/********** VOP3 Patterns **********/
|
/********** VOP3 Patterns **********/
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
|
|
||||||
def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
|
def : Pat <
|
||||||
(V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2)>;
|
(f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
|
||||||
|
(V_MAD_F32 $src0, $src1, $src2)
|
||||||
|
>;
|
||||||
|
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
/********** SMRD Patterns **********/
|
/********** SMRD Patterns **********/
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
|
|
||||||
multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
|
multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
|
||||||
|
|
||||||
// 1. Offset as 8bit DWORD immediate
|
// 1. Offset as 8bit DWORD immediate
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
|
(constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
|
||||||
(vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
|
(vt (Instr_IMM $sbase, IMM8bitDWORD:$offset))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 2. Offset loaded in an 32bit SGPR
|
// 2. Offset loaded in an 32bit SGPR
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
|
(constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
|
||||||
(vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
|
(vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 3. No offset at all
|
// 3. No offset at all
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(constant_load SReg_64:$sbase),
|
(constant_load i64:$sbase),
|
||||||
(vt (Instr_IMM SReg_64:$sbase, 0))
|
(vt (Instr_IMM $sbase, 0))
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1556,45 +1546,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
|
|||||||
/********** Indirect adressing **********/
|
/********** Indirect adressing **********/
|
||||||
/********** ====================== **********/
|
/********** ====================== **********/
|
||||||
|
|
||||||
multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
|
multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
|
||||||
SI_INDIRECT_DST IndDst> {
|
|
||||||
// 1. Extract with offset
|
// 1. Extract with offset
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(vector_extract (vt rc:$vec),
|
(vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
|
||||||
(i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
|
(f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
|
||||||
),
|
|
||||||
(f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 2. Extract without offset
|
// 2. Extract without offset
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(vector_extract (vt rc:$vec),
|
(vector_extract vt:$vec, (i64 (zext i32:$idx))),
|
||||||
(i64 (zext (i32 VReg_32:$idx)))
|
(f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
|
||||||
),
|
|
||||||
(f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 3. Insert with offset
|
// 3. Insert with offset
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(vector_insert (vt rc:$vec), (f32 VReg_32:$val),
|
(vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
|
||||||
(i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
|
(IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
|
||||||
),
|
|
||||||
(vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
|
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 4. Insert without offset
|
// 4. Insert without offset
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(vector_insert (vt rc:$vec), (f32 VReg_32:$val),
|
(vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
|
||||||
(i64 (zext (i32 VReg_32:$idx)))
|
(IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
|
||||||
),
|
|
||||||
(vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
|
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
|
defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
|
||||||
defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
|
defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
|
||||||
defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
|
defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
|
||||||
defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
|
defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
|
||||||
|
|
||||||
/********** =============== **********/
|
/********** =============== **********/
|
||||||
/********** Conditions **********/
|
/********** Conditions **********/
|
||||||
@ -1602,12 +1584,12 @@ defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
|
|||||||
|
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(i1 (setcc f32:$src0, f32:$src1, SETO)),
|
(i1 (setcc f32:$src0, f32:$src1, SETO)),
|
||||||
(V_CMP_O_F32_e64 f32:$src0, f32:$src1)
|
(V_CMP_O_F32_e64 $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(i1 (setcc f32:$src0, f32:$src1, SETUO)),
|
(i1 (setcc f32:$src0, f32:$src1, SETUO)),
|
||||||
(V_CMP_U_F32_e64 f32:$src0, f32:$src1)
|
(V_CMP_U_F32_e64 $src0, $src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // End isSI predicate
|
} // End isSI predicate
|
||||||
|
Loading…
Reference in New Issue
Block a user