llvm-6502/include/llvm/IR/IntrinsicsPowerPC.td

927 lines
46 KiB
TableGen
Raw Normal View History

//===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the PowerPC-specific intrinsics.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Definitions for all PowerPC intrinsics.
//
// Non-altivec intrinsics.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbf : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty],
[IntrReadWriteArgMem, NoCapture<0>]>;
def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty],
[IntrReadWriteArgMem, NoCapture<0>]>;
def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
// sync instruction (i.e. sync 0, a.k.a hwsync)
def int_ppc_sync : Intrinsic<[], [], []>;
// lwsync is sync 1
def int_ppc_lwsync : Intrinsic<[], [], []>;
Implement PPC counter loops as a late IR-level pass The old PPCCTRLoops pass, like the Hexagon pass version from which it was derived, could only handle some simple loops in canonical form. We cannot directly adapt the new Hexagon hardware loops pass, however, because the Hexagon pass contains a fundamental assumption that non-constant-trip-count loops will contain a guard, and this is not always true (the result being that incorrect negative counts can be generated). With this commit, we replace the pass with a late IR-level pass which makes use of SE to calculate the backedge-taken counts and safely generate the loop-count expressions (including any necessary max() parts). This IR level pass inserts custom intrinsics that are lowered into the desired decrement-and-branch instructions. The most fragile part of this new implementation is that interfering uses of the counter register must be detected on the IR level (and, on PPC, this also includes any indirect branches in addition to function calls). Also, to make all of this work, we need a variant of the mtctr instruction that is marked as having side effects. Without this, machine-code level CSE, DCE, etc. illegally transform the resulting code. Hopefully, this can be improved in the future. This new pass is smaller than the original (and much smaller than the new Hexagon hardware loops pass), and can handle many additional cases correctly. In addition, the preheader-creation code has been copied from LoopSimplify, and after we decide on where it belongs, this code will be refactored so that it can be explicitly shared (making this implementation even smaller). The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for the new Hexagon pass. There are a few classes of loops that this pass does not transform (noted by FIXMEs in the files), but these deficiencies can be addressed within the SE infrastructure (thus helping many other passes as well). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8
2013-05-15 21:37:41 +00:00
// Intrinsics used to generate ctr-based loops. These should only be
// generated by the PowerPC backend!
def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>;
def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>;
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_divweu : GCCBuiltin<"__builtin_divweu">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_divde : GCCBuiltin<"__builtin_divde">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_ppc_divdeu : GCCBuiltin<"__builtin_divdeu">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
// Bit permute doubleword
def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
}
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
/// PowerPC_Vec_Intrinsic - Base class for all altivec intrinsics.
class PowerPC_Vec_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
: GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
/// PowerPC_VSX_Intrinsic - Base class for all VSX intrinsics.
class PowerPC_VSX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
: GCCBuiltin<!strconcat("__builtin_vsx_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
}
//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Class Definitions.
//
/// PowerPC_Vec_FF_Intrinsic - A PowerPC intrinsic that takes one v4f32
/// vector and returns one. These intrinsics have no side effects.
class PowerPC_Vec_FF_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
/// PowerPC_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
/// PowerPC_Vec_HHH_Intrinsic - A PowerPC intrinsic that takes two v8i16
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
/// PowerPC_Vec_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
/// PowerPC_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_DDD_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
/// PowerPC_Vec_QQQ_Intrinsic - A PowerPC intrinsic that takes two v1i128
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_QQQ_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Class Definitions.
//
/// PowerPC_VSX_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_VSX_Vec_DDD_Intrinsic<string GCCIntSuffix>
: PowerPC_VSX_Intrinsic<GCCIntSuffix,
[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
/// PowerPC_VSX_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_VSX_Vec_FFF_Intrinsic<string GCCIntSuffix>
: PowerPC_VSX_Intrinsic<GCCIntSuffix,
[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
/// PowerPC_VSX_Sca_DDD_Intrinsic - A PowerPC intrinsic that takes two f64
/// scalars and returns one. These intrinsics have no side effects.
class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
: PowerPC_VSX_Intrinsic<GCCIntSuffix,
[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Data Stream Control.
def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">,
Intrinsic<[], [llvm_i32_ty], []>;
def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">,
Intrinsic<[], [], []>;
def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
// VSCR access.
def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>;
def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
Intrinsic<[], [llvm_v4i32_ty], []>;
// Loads. These don't map directly to GCC builtins because they represent the
// source address with a single pointer.
def int_ppc_altivec_lvx :
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_ppc_altivec_lvxl :
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_ppc_altivec_lvebx :
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_ppc_altivec_lvehx :
Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_ppc_altivec_lvewx :
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
// Stores. These don't map directly to GCC builtins because they represent the
// source address with a single pointer.
def int_ppc_altivec_stvx :
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
def int_ppc_altivec_stvxl :
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
def int_ppc_altivec_stvebx :
Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
def int_ppc_altivec_stvehx :
Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
def int_ppc_altivec_stvewx :
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
// Comparisons setting a vector.
def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpeqfp : GCCBuiltin<"__builtin_altivec_vcmpeqfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgefp : GCCBuiltin<"__builtin_altivec_vcmpgefp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequd : GCCBuiltin<"__builtin_altivec_vcmpequd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsd : GCCBuiltin<"__builtin_altivec_vcmpgtsd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsw : GCCBuiltin<"__builtin_altivec_vcmpgtsw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsh : GCCBuiltin<"__builtin_altivec_vcmpgtsh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsb : GCCBuiltin<"__builtin_altivec_vcmpgtsb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
// Predicate Comparisons. The first operand specifies interpretation of CR6.
def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpeqfp_p : GCCBuiltin<"__builtin_altivec_vcmpeqfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgefp_p : GCCBuiltin<"__builtin_altivec_vcmpgefp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequd_p : GCCBuiltin<"__builtin_altivec_vcmpequd_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsd_p : GCCBuiltin<"__builtin_altivec_vcmpgtsd_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtud_p : GCCBuiltin<"__builtin_altivec_vcmpgtud_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsw_p : GCCBuiltin<"__builtin_altivec_vcmpgtsw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsh_p : GCCBuiltin<"__builtin_altivec_vcmpgtsh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsb_p : GCCBuiltin<"__builtin_altivec_vcmpgtsb_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
}
// Vector average.
def int_ppc_altivec_vavgsb : PowerPC_Vec_BBB_Intrinsic<"vavgsb">;
def int_ppc_altivec_vavgsh : PowerPC_Vec_HHH_Intrinsic<"vavgsh">;
def int_ppc_altivec_vavgsw : PowerPC_Vec_WWW_Intrinsic<"vavgsw">;
def int_ppc_altivec_vavgub : PowerPC_Vec_BBB_Intrinsic<"vavgub">;
def int_ppc_altivec_vavguh : PowerPC_Vec_HHH_Intrinsic<"vavguh">;
def int_ppc_altivec_vavguw : PowerPC_Vec_WWW_Intrinsic<"vavguw">;
// Vector maximum.
def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">;
def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">;
def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">;
def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">;
def int_ppc_altivec_vmaxsd : PowerPC_Vec_DDD_Intrinsic<"vmaxsd">;
def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">;
def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">;
def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">;
def int_ppc_altivec_vmaxud : PowerPC_Vec_DDD_Intrinsic<"vmaxud">;
// Vector minimum.
def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">;
def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">;
def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">;
def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">;
def int_ppc_altivec_vminsd : PowerPC_Vec_DDD_Intrinsic<"vminsd">;
def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">;
def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">;
def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">;
def int_ppc_altivec_vminud : PowerPC_Vec_DDD_Intrinsic<"vminud">;
// Saturating adds.
def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">;
def int_ppc_altivec_vaddsbs : PowerPC_Vec_BBB_Intrinsic<"vaddsbs">;
def int_ppc_altivec_vadduhs : PowerPC_Vec_HHH_Intrinsic<"vadduhs">;
def int_ppc_altivec_vaddshs : PowerPC_Vec_HHH_Intrinsic<"vaddshs">;
def int_ppc_altivec_vadduws : PowerPC_Vec_WWW_Intrinsic<"vadduws">;
def int_ppc_altivec_vaddsws : PowerPC_Vec_WWW_Intrinsic<"vaddsws">;
def int_ppc_altivec_vaddcuw : PowerPC_Vec_WWW_Intrinsic<"vaddcuw">;
def int_ppc_altivec_vaddcuq : PowerPC_Vec_QQQ_Intrinsic<"vaddcuq">;
// Saturating subs.
def int_ppc_altivec_vsububs : PowerPC_Vec_BBB_Intrinsic<"vsububs">;
def int_ppc_altivec_vsubsbs : PowerPC_Vec_BBB_Intrinsic<"vsubsbs">;
def int_ppc_altivec_vsubuhs : PowerPC_Vec_HHH_Intrinsic<"vsubuhs">;
def int_ppc_altivec_vsubshs : PowerPC_Vec_HHH_Intrinsic<"vsubshs">;
def int_ppc_altivec_vsubuws : PowerPC_Vec_WWW_Intrinsic<"vsubuws">;
def int_ppc_altivec_vsubsws : PowerPC_Vec_WWW_Intrinsic<"vsubsws">;
def int_ppc_altivec_vsubcuw : PowerPC_Vec_WWW_Intrinsic<"vsubcuw">;
def int_ppc_altivec_vsubcuq : PowerPC_Vec_QQQ_Intrinsic<"vsubcuq">;
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Saturating multiply-adds.
def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_altivec_vnmsubfp : GCCBuiltin<"__builtin_altivec_vnmsubfp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
// Vector Multiply Sum Intructions.
def int_ppc_altivec_vmsummbm : GCCBuiltin<"__builtin_altivec_vmsummbm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumshm : GCCBuiltin<"__builtin_altivec_vmsumshm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
// Vector Multiply Intructions.
def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// Vector Sum Intructions.
def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// Other multiplies.
def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
// Packs.
def int_ppc_altivec_vpkpx : GCCBuiltin<"__builtin_altivec_vpkpx">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
// vpkuhum is lowered to a shuffle.
def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
// vpkuwum is lowered to a shuffle.
def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// vpkudum is lowered to a shuffle.
def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
// Unpacks.
def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_ppc_altivec_vupkhsb : GCCBuiltin<"__builtin_altivec_vupkhsb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_ppc_altivec_vupkhsw : GCCBuiltin<"__builtin_altivec_vupkhsw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_ppc_altivec_vupklsw : GCCBuiltin<"__builtin_altivec_vupklsw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
// FP <-> integer conversion.
def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_altivec_vrfin : GCCBuiltin<"__builtin_altivec_vrfin">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_altivec_vrfip : GCCBuiltin<"__builtin_altivec_vrfip">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
// Add Extended Quadword
def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
// Sub Extended Quadword
def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
}
def int_ppc_altivec_vsl : PowerPC_Vec_WWW_Intrinsic<"vsl">;
def int_ppc_altivec_vslo : PowerPC_Vec_WWW_Intrinsic<"vslo">;
def int_ppc_altivec_vslb : PowerPC_Vec_BBB_Intrinsic<"vslb">;
def int_ppc_altivec_vslh : PowerPC_Vec_HHH_Intrinsic<"vslh">;
def int_ppc_altivec_vslw : PowerPC_Vec_WWW_Intrinsic<"vslw">;
// Right Shifts.
def int_ppc_altivec_vsr : PowerPC_Vec_WWW_Intrinsic<"vsr">;
def int_ppc_altivec_vsro : PowerPC_Vec_WWW_Intrinsic<"vsro">;
def int_ppc_altivec_vsrb : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
def int_ppc_altivec_vsrh : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
def int_ppc_altivec_vsrw : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
// Rotates.
def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">;
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Miscellaneous.
def int_ppc_altivec_lvsl :
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_ppc_altivec_lvsr :
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
}
def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
def int_ppc_altivec_vlogefp : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
def int_ppc_altivec_vrefp : PowerPC_Vec_FF_Intrinsic<"vrefp">;
def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
// Power8 Intrinsics
// Crypto
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_crypto_vsbox :
GCCBuiltin<"__builtin_altivec_crypto_vsbox">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vpermxor :
GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vshasigmad :
GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vshasigmaw :
GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
}
def int_ppc_altivec_crypto_vcipher :
PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">;
def int_ppc_altivec_crypto_vcipherlast :
PowerPC_Vec_DDD_Intrinsic<"crypto_vcipherlast">;
def int_ppc_altivec_crypto_vncipher :
PowerPC_Vec_DDD_Intrinsic<"crypto_vncipher">;
def int_ppc_altivec_crypto_vncipherlast :
PowerPC_Vec_DDD_Intrinsic<"crypto_vncipherlast">;
def int_ppc_altivec_crypto_vpmsumb :
PowerPC_Vec_BBB_Intrinsic<"crypto_vpmsumb">;
def int_ppc_altivec_crypto_vpmsumh :
PowerPC_Vec_HHH_Intrinsic<"crypto_vpmsumh">;
def int_ppc_altivec_crypto_vpmsumw :
PowerPC_Vec_WWW_Intrinsic<"crypto_vpmsumw">;
def int_ppc_altivec_crypto_vpmsumd :
PowerPC_Vec_DDD_Intrinsic<"crypto_vpmsumd">;
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
[PowerPC] Add vec_vsx_ld and vec_vsx_st intrinsics This patch enables the vec_vsx_ld and vec_vsx_st intrinsics for PowerPC, which provide programmer access to the lxvd2x, lxvw4x, stxvd2x, and stxvw4x instructions. New LLVM intrinsics are provided to represent these four instructions in IntrinsicsPowerPC.td. These are patterned after the similar intrinsics for lvx and stvx (Altivec). In PPCInstrVSX.td, these intrinsics are tied to the code gen patterns, with additional patterns to allow plain vanilla loads and stores to still generate these instructions. At -O1 and higher the intrinsics are immediately converted to loads and stores in InstCombineCalls.cpp. This will open up more optimization opportunities while still allowing the correct instructions to be generated. (Similar code exists for aligned Altivec loads and stores.) The new intrinsics are added to the code that checks for consecutive loads and stores in PPCISelLowering.cpp, as well as to PPCTargetLowering::getTgtMemIntrinsic(). There's a new test to verify the correct instructions are generated. The loads and stores tend to be reordered, so the test just counts their number. It runs at -O2, as it's not very effective to test this at -O0, when many unnecessary loads and stores are generated. I ended up having to modify vsx-fma-m.ll. It turns out this test case is slightly unreliable, but I don't know a good way to prevent problems with it. The xvmaddmdp instructions read and write the same register, which is one of the multiplicands. Commutativity allows either to be chosen. If the FMAs are reordered differently than expected by the test, the register assignment can be different as a result. Hopefully this doesn't change often. There is a companion patch for Clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221767 91177308-0d34-0410-b5e6-96231b3b80d8
2014-11-12 04:19:40 +00:00
// Vector load.
def int_ppc_vsx_lxvw4x :
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_ppc_vsx_lxvd2x :
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
// Vector store.
def int_ppc_vsx_stxvw4x :
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrReadWriteArgMem]>;
def int_ppc_vsx_stxvd2x :
Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrReadWriteArgMem]>;
// Vector and scalar maximum.
def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
def int_ppc_vsx_xsmaxdp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmaxdp">;
// Vector and scalar minimum.
def int_ppc_vsx_xvmindp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmindp">;
def int_ppc_vsx_xvminsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvminsp">;
def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
// Vector divide.
def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">;
def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">;
}
[PowerPC] Add support for the QPX vector instruction set This adds support for the QPX vector instruction set, which is used by the enhanced A2 cores on the IBM BG/Q supercomputers. QPX vectors are 256 bytes wide, holding 4 double-precision floating-point values. Boolean values, modeled here as <4 x i1> are actually also represented as floating-point values (essentially { -1, 1 } for { false, true }). QPX shares many features with Altivec and VSX, but is distinct from both of them. One major difference is that, instead of adding completely-separate vector registers, QPX vector registers are extensions of the scalar floating-point registers (lane 0 is the corresponding scalar floating-point value). The operations supported on QPX vectors mirrors that supported on the scalar floating-point values (with some additional ones for permutations and logical/comparison operations). I've been maintaining this support out-of-tree, as part of the bgclang project, for several years. This is not the entire bgclang patch set, but is most of the subset that can be cleanly integrated into LLVM proper at this time. Adding this to the LLVM backend is part of my efforts to rebase bgclang to the current LLVM trunk, but is independently useful (especially for codes that use LLVM as a JIT in library form). The assembler/disassembler test coverage is complete. The CodeGen test coverage is not, but I've included some tests, and more will be added as follow-up work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230413 91177308-0d34-0410-b5e6-96231b3b80d8
2015-02-25 01:06:45 +00:00
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsics.
//
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
/// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
: GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
}
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsic Class Definitions.
//
/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
/// vector and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and returns a v4f64.
class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and returns a v4f64 permutation.
class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and stores a v4f64.
class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[], [llvm_v4f64_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Add Instructions
def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
// Estimate Instructions
def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
// Multiply Instructions
def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
// Multiply-add instructions
def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
// Select Instruction
def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
// Permute Instruction
def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
// Convert and Round Instructions
def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
// Move Instructions
def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
// Compare Instructions
def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
// Load instructions
def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
// Store instructions
def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
// Logical and permutation formation
def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
[llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// PowerPC HTM Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_ppc_tend : GCCBuiltin<"__builtin_tend">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_ppc_tabortwc : GCCBuiltin<"__builtin_tabortwc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_ppc_tabortwci : GCCBuiltin<"__builtin_tabortwci">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_ppc_tabortdc : GCCBuiltin<"__builtin_tabortdc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_ppc_tabortdci : GCCBuiltin<"__builtin_tabortdci">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_ppc_tcheck : GCCBuiltin<"__builtin_tcheck">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_ppc_treclaim : GCCBuiltin<"__builtin_treclaim">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_ppc_trechkpt : GCCBuiltin<"__builtin_trechkpt">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_ppc_tsr : GCCBuiltin<"__builtin_tsr">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_ppc_get_texasr : GCCBuiltin<"__builtin_get_texasr">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_ppc_get_texasru : GCCBuiltin<"__builtin_get_texasru">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_ppc_get_tfhar : GCCBuiltin<"__builtin_get_tfhar">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_ppc_get_tfiar : GCCBuiltin<"__builtin_get_tfiar">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_ppc_set_texasr : GCCBuiltin<"__builtin_set_texasr">,
Intrinsic<[], [llvm_i64_ty], []>;
def int_ppc_set_texasru : GCCBuiltin<"__builtin_set_texasru">,
Intrinsic<[], [llvm_i64_ty], []>;
def int_ppc_set_tfhar : GCCBuiltin<"__builtin_set_tfhar">,
Intrinsic<[], [llvm_i64_ty], []>;
def int_ppc_set_tfiar : GCCBuiltin<"__builtin_set_tfiar">,
Intrinsic<[], [llvm_i64_ty], []>;
// Extended mnemonics
def int_ppc_tendall : GCCBuiltin<"__builtin_tendall">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_ppc_tresume : GCCBuiltin<"__builtin_tresume">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
Intrinsic<[llvm_i64_ty], [], []>;
}