mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-24 22:32:47 +00:00
dd6fbd1136
inconsistent intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97959 91177308-0d34-0410-b5e6-96231b3b80d8
98 lines
4.4 KiB
TableGen
98 lines
4.4 KiB
TableGen
//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
|
|
//
|
|
// Cell SPU math operations
|
|
//
|
|
// This target description file contains instruction sequences for various
|
|
// math operations, such as vector multiplies, i32 multiply, etc., for the
|
|
// SPU's i32, i16 i8 and corresponding vector types.
|
|
//
|
|
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
|
|
// purely and completely coincidental.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// v16i8 multiply instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
|
|
(ORv4i32
|
|
(ANDv4i32
|
|
(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
|
|
(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
|
|
(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
|
|
(FSMBIv8i16 0x2222)),
|
|
(ILAv4i32 0x0000ffff)),
|
|
(SHLIv4i32
|
|
(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
|
|
(ROTMAIv4i32_i32 VECREG:$rB, 16)),
|
|
(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
|
|
(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
|
|
(FSMBIv8i16 0x2222)), 16))>;
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// v8i16 multiply instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
|
|
(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
|
|
(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
|
|
(FSMBIv8i16 0xcccc))>;
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// v4i32, i32 multiply instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
def MPYv4i32:
|
|
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
|
|
(Av4i32
|
|
(v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
|
|
(v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
|
|
(v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
|
|
|
|
def MPYi32:
|
|
Pat<(mul R32C:$rA, R32C:$rB),
|
|
(Ar32
|
|
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
|
|
(MPYHr32 R32C:$rB, R32C:$rA)),
|
|
(MPYUr32 R32C:$rA, R32C:$rB))>;
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// f32, v4f32 divide instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
// Reciprocal estimate and interpolation
|
|
def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
|
|
// Division estimate
|
|
def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
|
|
// Newton-Raphson iteration
|
|
def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
|
|
Interpf32.Fragment,
|
|
DivEstf32.Fragment)>;
|
|
// Epsilon addition
|
|
def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
|
|
|
|
def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
|
|
(SELBf32_cond NRaphf32.Fragment,
|
|
Epsilonf32.Fragment,
|
|
(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
|
|
|
|
// Reciprocal estimate and interpolation
|
|
def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
|
|
// Division estimate
|
|
def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
|
|
// Newton-Raphson iteration
|
|
def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
|
|
(v4f32 VECREG:$rB),
|
|
(v4f32 VECREG:$rA)),
|
|
Interpv4f32.Fragment,
|
|
DivEstv4f32.Fragment)>;
|
|
// Epsilon addition
|
|
def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
|
|
|
|
def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
|
|
(SELBv4f32_cond NRaphv4f32.Fragment,
|
|
Epsilonv4f32.Fragment,
|
|
(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
|
|
Epsilonv4f32.Fragment,
|
|
(v4f32 VECREG:$rA)), -1))>;
|