From f8d179ba76285e728702e15552b3b857471e4e92 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 25 Feb 2015 01:06:45 +0000 Subject: [PATCH] [PowerPC] Add support for the QPX vector instruction set This adds support for the QPX vector instruction set, which is used by the enhanced A2 cores on the IBM BG/Q supercomputers. QPX vectors are 256 bytes wide, holding 4 double-precision floating-point values. Boolean values, modeled here as <4 x i1> are actually also represented as floating-point values (essentially { -1, 1 } for { false, true }). QPX shares many features with Altivec and VSX, but is distinct from both of them. One major difference is that, instead of adding completely-separate vector registers, QPX vector registers are extensions of the scalar floating-point registers (lane 0 is the corresponding scalar floating-point value). The operations supported on QPX vectors mirrors that supported on the scalar floating-point values (with some additional ones for permutations and logical/comparison operations). I've been maintaining this support out-of-tree, as part of the bgclang project, for several years. This is not the entire bgclang patch set, but is most of the subset that can be cleanly integrated into LLVM proper at this time. Adding this to the LLVM backend is part of my efforts to rebase bgclang to the current LLVM trunk, but is independently useful (especially for codes that use LLVM as a JIT in library form). The assembler/disassembler test coverage is complete. The CodeGen test coverage is not, but I've included some tests, and more will be added as follow-up work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230413 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsPowerPC.td | 177 +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 26 + .../PowerPC/Disassembler/PPCDisassembler.cpp | 29 + .../PowerPC/InstPrinter/PPCInstPrinter.cpp | 23 +- .../PowerPC/InstPrinter/PPCInstPrinter.h | 1 + lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 + lib/Target/PowerPC/PPCCallingConv.td | 40 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 2 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 17 + lib/Target/PowerPC/PPCISelLowering.cpp | 1145 +++++++++++++++- lib/Target/PowerPC/PPCISelLowering.h | 30 +- lib/Target/PowerPC/PPCInstrFormats.td | 92 ++ lib/Target/PowerPC/PPCInstrInfo.cpp | 42 + lib/Target/PowerPC/PPCInstrInfo.td | 45 + lib/Target/PowerPC/PPCInstrQPX.td | 1192 +++++++++++++++++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 3 + lib/Target/PowerPC/PPCRegisterInfo.td | 23 + lib/Target/PowerPC/PPCSubtarget.cpp | 9 +- lib/Target/PowerPC/PPCSubtarget.h | 13 + lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 9 +- test/CodeGen/PowerPC/qpx-bv-sint.ll | 33 + test/CodeGen/PowerPC/qpx-bv.ll | 37 + test/CodeGen/PowerPC/qpx-func-clobber.ll | 21 + test/CodeGen/PowerPC/qpx-load.ll | 25 + test/CodeGen/PowerPC/qpx-recipest.ll | 194 +++ test/CodeGen/PowerPC/qpx-rounding-ops.ll | 109 ++ test/CodeGen/PowerPC/qpx-s-load.ll | 25 + test/CodeGen/PowerPC/qpx-s-sel.ll | 143 ++ test/CodeGen/PowerPC/qpx-s-store.ll | 24 + test/CodeGen/PowerPC/qpx-sel.ll | 151 +++ test/CodeGen/PowerPC/qpx-store.ll | 24 + test/CodeGen/PowerPC/qpx-unalperm.ll | 64 + test/CodeGen/PowerPC/vsx-infl-copy2.ll | 1 - test/MC/Disassembler/PowerPC/qpx.txt | 383 ++++++ test/MC/PowerPC/qpx.s | 251 ++++ 35 files changed, 4336 insertions(+), 68 deletions(-) create mode 100644 lib/Target/PowerPC/PPCInstrQPX.td create mode 100644 test/CodeGen/PowerPC/qpx-bv-sint.ll create mode 100644 test/CodeGen/PowerPC/qpx-bv.ll create mode 100644 test/CodeGen/PowerPC/qpx-func-clobber.ll create mode 100644 test/CodeGen/PowerPC/qpx-load.ll create mode 100644 test/CodeGen/PowerPC/qpx-recipest.ll create mode 100644 test/CodeGen/PowerPC/qpx-rounding-ops.ll create mode 100644 test/CodeGen/PowerPC/qpx-s-load.ll create mode 100644 test/CodeGen/PowerPC/qpx-s-sel.ll create mode 100644 test/CodeGen/PowerPC/qpx-s-store.ll create mode 100644 test/CodeGen/PowerPC/qpx-sel.ll create mode 100644 test/CodeGen/PowerPC/qpx-store.ll create mode 100644 test/CodeGen/PowerPC/qpx-unalperm.ll create mode 100644 test/MC/Disassembler/PowerPC/qpx.txt create mode 100644 test/MC/PowerPC/qpx.s diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index 5cdabdeadae..110d55d562a 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -542,3 +542,180 @@ def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">; def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">; def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">; } + +//===----------------------------------------------------------------------===// +// PowerPC QPX Intrinsics. +// + +let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". + /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics. + class PowerPC_QPX_Intrinsic ret_types, + list param_types, + list properties> + : GCCBuiltin, + Intrinsic; +} + +//===----------------------------------------------------------------------===// +// PowerPC QPX Intrinsic Class Definitions. +// + +/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64 +/// vector and returns one. These intrinsics have no side effects. +class PowerPC_QPX_FF_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_QPX_FFF_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_QPX_FFFF_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer +/// and returns a v4f64. +class PowerPC_QPX_Load_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer +/// and returns a v4f64 permutation. +class PowerPC_QPX_LoadPerm_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer +/// and stores a v4f64. +class PowerPC_QPX_Store_Intrinsic + : PowerPC_QPX_Intrinsic; + +//===----------------------------------------------------------------------===// +// PowerPC QPX Intrinsic Definitions. + +let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". + // Add Instructions + def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">; + def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">; + def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">; + def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">; + + // Estimate Instructions + def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">; + def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">; + def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">; + def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">; + + // Multiply Instructions + def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">; + def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">; + def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">; + def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">; + + // Multiply-add instructions + def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">; + def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">; + def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">; + def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">; + def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">; + def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">; + def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">; + def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">; + def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">; + def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">; + def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">; + def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">; + def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">; + def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">; + def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">; + def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">; + + // Select Instruction + def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">; + + // Permute Instruction + def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">; + + // Convert and Round Instructions + def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">; + def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">; + def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">; + def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">; + def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">; + def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">; + def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">; + def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">; + def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">; + def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">; + def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">; + def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">; + def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">; + def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">; + def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">; + def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">; + def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">; + + // Move Instructions + def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">; + def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">; + def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">; + def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">; + + // Compare Instructions + def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">; + def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">; + def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">; + def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">; + + // Load instructions + def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">; + def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">; + def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">; + def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">; + + def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">; + def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">; + def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">; + def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">; + def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">; + def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">; + def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">; + def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">; + + def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">; + def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">; + def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">; + def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">; + + // Store instructions + def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">; + def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">; + def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">; + def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">; + + def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">; + def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">; + def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">; + def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">; + def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">; + def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">; + + // Logical and permutation formation + def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical", + [llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci", + [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>; +} + diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index cd36e58b78d..bf00e7397be 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -132,6 +132,16 @@ static const MCPhysReg VSFRegs[64] = { PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 }; +static unsigned QFRegs[32] = { + PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, + PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, + PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15, + PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19, + PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23, + PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27, + PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 +}; static const MCPhysReg CRBITRegs[32] = { PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, @@ -429,6 +439,7 @@ public: bool isU8ImmX8() const { return Kind == Immediate && isUInt<8>(getImm()) && (getImm() & 7) == 0; } + bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } bool isU16Imm() const { switch (Kind) { case Expression: @@ -564,6 +575,21 @@ public: Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()])); } + void addRegQFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); + } + + void addRegQSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); + } + + void addRegQBRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 5251b60f348..0ed07239327 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -164,6 +164,17 @@ static const unsigned G8Regs[] = { PPC::X28, PPC::X29, PPC::X30, PPC::X31 }; +static const unsigned QFRegs[] = { + PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, + PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, + PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15, + PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19, + PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23, + PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27, + PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 +}; + template static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, const unsigned (&Regs)[N]) { @@ -235,6 +246,15 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass +static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, QFRegs); +} + +#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass +#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass + template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { @@ -335,6 +355,15 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint32_t Inst = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0); + if ((STI.getFeatureBits() & PPC::FeatureQPX) != 0) { + DecodeStatus result = + decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI); + if (result != MCDisassembler::Fail) + return result; + + MI.clear(); + } + return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 670c40a2a3b..c287fbe7c5b 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -34,7 +34,20 @@ FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false), #include "PPCGenAsmWriter.inc" void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); + const char *RegName = getRegisterName(RegNo); + if (RegName[0] == 'q' /* QPX */) { + // The system toolchain on the BG/Q does not understand QPX register names + // in .cfi_* directives, so print the name of the floating-point + // subregister instead. + std::string RN(RegName); + + RN[0] = 'f'; + OS << RN; + + return; + } + + OS << RegName; } void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -236,6 +249,13 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned short Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 4095 && "Invalid u12imm argument!"); + O << (unsigned short)Value; +} + void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) @@ -338,6 +358,7 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': + case 'q': // for QPX case 'v': if (RegName[1] == 's') return RegName + 2; diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index b21aa22daa1..6ead19b33fe 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -48,6 +48,7 @@ public: void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 480b790a99b..13272908b12 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -151,6 +151,7 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': + case 'q': // for QPX case 'v': if (RegName[1] == 's') return RegName + 2; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 3eaec6ba54d..045fca3c747 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -55,13 +55,17 @@ def RetCC_PPC : CallingConv<[ // only the ELFv2 ABI fully utilizes all these registers. CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - + + // QPX vectors are returned in QF1 and QF2. + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, + // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, - CCIfType<[v2f64, v2i64], - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -108,10 +112,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, - CCIfType<[v2f64, v2i64], - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> ]>; //===----------------------------------------------------------------------===// @@ -144,6 +150,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // alignment and size as doubles. CCIfType<[f32,f64], CCAssignToStack<8, 8>>, + // QPX vectors that are stored in double precision need 32-byte alignment. + CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>, + // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>> ]>; @@ -158,12 +167,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[ // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to // put vector arguments in vector registers before putting them on the stack. def CC_PPC32_SVR4 : CallingConv<[ + // QPX vectors mirror the scalar FP convention. + CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", + CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>, + // The first 12 Vector arguments are passed in AltiVec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, - CCIfType<[v2f64, v2i64], + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, + V10, V11, V12, V13]>>>, + CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, - VSH10, VSH11, VSH12, VSH13]>>, + VSH10, VSH11, VSH12, VSH13]>>>, CCDelegateTo ]>; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 10429db9b90..f997fea4d93 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -83,7 +83,7 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0), + STI.getPlatformStackAlignment(), 0), Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), TOCSaveOffset(computeTOCSaveOffset(Subtarget)), FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 2418ca6b19a..0d553d32f31 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2293,6 +2293,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { + if (PPCSubTarget->hasQPX()) + return nullptr; + EVT VecVT = LHS.getValueType(); bool Swap, Negate; unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, @@ -2468,6 +2471,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); + case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX + case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX case MVT::f64: Opcode = PPC::LFDUX; break; case MVT::f32: Opcode = PPC::LFSUX; break; case MVT::i32: Opcode = PPC::LWZUX; break; @@ -2711,6 +2716,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SelectCCOp = PPC::SELECT_CC_VSFRC; else SelectCCOp = PPC::SELECT_CC_F8; + else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) + SelectCCOp = PPC::SELECT_CC_QFRC; + else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) + SelectCCOp = PPC::SELECT_CC_QSRC; + else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) + SelectCCOp = PPC::SELECT_CC_QBRC; else if (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64) SelectCCOp = PPC::SELECT_CC_VSRC; @@ -3406,6 +3417,9 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: + case PPC::SELECT_QFRC: + case PPC::SELECT_QSRC: + case PPC::SELECT_QBRC: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSRC: { @@ -3713,6 +3727,9 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: + case PPC::SELECT_QFRC: + case PPC::SELECT_QSRC: + case PPC::SELECT_QBRC: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSRC: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 7346dff8602..bb0eb399529 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -610,6 +610,162 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); } + if (Subtarget.hasQPX()) { + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FREM, MVT::v4f64, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); + setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); + + setOperationAction(ISD::LOAD , MVT::v4f64, Custom); + setOperationAction(ISD::STORE , MVT::v4f64, Custom); + + setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4f64, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); + + setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); + setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); + + setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); + setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + + setOperationAction(ISD::FNEG , MVT::v4f64, Legal); + setOperationAction(ISD::FABS , MVT::v4f64, Legal); + setOperationAction(ISD::FSIN , MVT::v4f64, Expand); + setOperationAction(ISD::FCOS , MVT::v4f64, Expand); + setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); + setOperationAction(ISD::FPOW , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); + setOperationAction(ISD::FEXP , MVT::v4f64, Expand); + setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); + + setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); + + setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); + + addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); + + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FREM, MVT::v4f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); + setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); + + setOperationAction(ISD::LOAD , MVT::v4f32, Custom); + setOperationAction(ISD::STORE , MVT::v4f32, Custom); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4f32, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); + + setOperationAction(ISD::FNEG , MVT::v4f32, Legal); + setOperationAction(ISD::FABS , MVT::v4f32, Legal); + setOperationAction(ISD::FSIN , MVT::v4f32, Expand); + setOperationAction(ISD::FCOS , MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); + setOperationAction(ISD::FPOW , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); + setOperationAction(ISD::FEXP , MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); + + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + + setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); + + addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); + + setOperationAction(ISD::AND , MVT::v4i1, Legal); + setOperationAction(ISD::OR , MVT::v4i1, Legal); + setOperationAction(ISD::XOR , MVT::v4i1, Legal); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4i1, Expand); + setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); + + setOperationAction(ISD::LOAD , MVT::v4i1, Custom); + setOperationAction(ISD::STORE , MVT::v4i1, Custom); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); + + setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); + + addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); + + setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); + setOperationAction(ISD::FROUND, MVT::v4f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); + + // These need to set FE_INEXACT, and so cannot be vectorized here. + setOperationAction(ISD::FRINT, MVT::v4f64, Expand); + setOperationAction(ISD::FRINT, MVT::v4f32, Expand); + + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + } else { + setOperationAction(ISD::FDIV, MVT::v4f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); + + setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + } + } + if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); @@ -621,8 +777,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } setBooleanContents(ZeroOrOneBooleanContent); - // Altivec instructions set fields to all zeros or all ones. - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + if (Subtarget.hasAltivec()) { + // Altivec instructions set fields to all zeros or all ones. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + } if (!isPPC64) { // These libcalls are not available in 32-bit. @@ -851,12 +1010,22 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; + case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; + case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; + case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; + case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; + case PPCISD::QBFLT: return "PPCISD::QBFLT"; + case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; } } -EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; + + if (Subtarget.hasQPX()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + return VT.changeVectorElementTypeToInteger(); } @@ -1242,6 +1411,36 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { return SDValue(); } +/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift +/// amount, otherwise return -1. +int PPC::isQVALIGNIShuffleMask(SDNode *N) { + EVT VT = N->getValueType(0); + if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) + return -1; + + ShuffleVectorSDNode *SVOp = cast(N); + + // Find the first non-undef value in the shuffle mask. + unsigned i; + for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) + /*search*/; + + if (i == 4) return -1; // all undef. + + // Otherwise, check to see if the rest of the elements are consecutively + // numbered from this value. + unsigned ShiftAmt = SVOp->getMaskElt(i); + if (ShiftAmt < i) return -1; + ShiftAmt -= i; + + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 4; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + + return ShiftAmt; +} + //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// @@ -1501,9 +1700,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } else return false; - // PowerPC doesn't have preinc load/store instructions for vectors. - if (VT.isVector()) - return false; + // PowerPC doesn't have preinc load/store instructions for vectors (except + // for QPX, which does have preinc r+r forms). + if (VT.isVector()) { + if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { + return false; + } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } + } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { @@ -2240,6 +2446,17 @@ static const MCPhysReg *GetFPR() { return FPR; } +/// GetQFPR - Get the set of QPX registers that should be allocated for +/// arguments. +static const MCPhysReg *GetQFPR() { + static const MCPhysReg QFPR[] = { + PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13 + }; + + return QFPR; +} + /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, @@ -2268,6 +2485,10 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) Align = 16; + // QPX vector types stored in double-precision are padded to a 32 byte + // boundary. + else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) + Align = 32; // ByVal parameters are aligned as requested. if (Flags.isByVal()) { @@ -2306,7 +2527,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, - unsigned &AvailableVRs) { + unsigned &AvailableVRs, bool HasQPX) { bool UseMemory = false; // Respect alignment of argument on the stack. @@ -2330,7 +2551,11 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { - if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || + // QPX registers overlap with the scalar FP registers. + (HasQPX && (ArgVT == MVT::v4f32 || + ArgVT == MVT::v4f64 || + ArgVT == MVT::v4i1))) if (AvailableFPRs > 0) { --AvailableFPRs; return false; @@ -2464,13 +2689,21 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: - case MVT::v4f32: RC = &PPC::VRRCRegClass; break; + case MVT::v4f32: + RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; + break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VSHRCRegClass; break; + case MVT::v4f64: + RC = &PPC::QFRCRegClass; + break; + case MVT::v4i1: + RC = &PPC::QBRCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. @@ -2658,9 +2891,12 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; + static const MCPhysReg *QFPR = GetQFPR(); + const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); + const unsigned Num_QFPR_Regs = Num_FPR_Regs; // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area @@ -2676,7 +2912,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( for (unsigned i = 0, e = Ins.size(); i != e; ++i) if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, - NumBytes, AvailableFPRs, AvailableVRs)) + NumBytes, AvailableFPRs, AvailableVRs, + Subtarget.hasQPX())) HasParameterArea = true; // Add DAG nodes to load the arguments or copy them out of registers. On @@ -2685,6 +2922,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -2908,6 +3146,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. @@ -2926,6 +3165,36 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; + } // not QPX + + assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && + "Invalid QPX parameter type"); + /* fall through */ + + case MVT::v4f64: + case MVT::v4i1: + // QPX vectors are treated like their scalar floating-point subregisters + // (except that they're larger). + unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; + if (QFPR_idx != Num_QFPR_Regs) { + const TargetRegisterClass *RC; + switch (ObjectVT.getSimpleVT().SimpleTy) { + case MVT::v4f64: RC = &PPC::QFRCRegClass; break; + case MVT::v4f32: RC = &PPC::QSRCRegClass; break; + default: RC = &PPC::QBRCRegClass; break; + } + + unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++QFPR_idx; + } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; + } + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += Sz; + break; } // We need to load the argument to a virtual register if we determined @@ -4306,6 +4575,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned &QFPR_idx = FPR_idx; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, @@ -4322,9 +4592,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; + static const MCPhysReg *QFPR = GetQFPR(); + const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); + const unsigned NumQFPRs = NumFPRs; // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. @@ -4348,12 +4621,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (++NumGPRsUsed <= NumGPRs) continue; break; - case MVT::f32: - case MVT::f64: - if (++NumFPRsUsed <= NumFPRs) - continue; - break; - case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: @@ -4362,6 +4629,24 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (++NumVRsUsed <= NumVRs) continue; break; + case MVT::v4f32: + // When using QPX, this is handled like a FP register, otherwise, it + // is an Altivec register. + if (Subtarget.hasQPX()) { + if (++NumFPRsUsed <= NumFPRs) + continue; + } else { + if (++NumVRsUsed <= NumVRs) + continue; + } + break; + case MVT::f32: + case MVT::f64: + case MVT::v4f64: // QPX + case MVT::v4i1: // QPX + if (++NumFPRsUsed <= NumFPRs) + continue; + break; } } @@ -4703,6 +4988,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. @@ -4766,6 +5052,60 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (CallConv != CallingConv::Fast) ArgOffset += 16; break; + } // not QPX + + assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && + "Invalid QPX parameter type"); + + /* fall through */ + case MVT::v4f64: + case MVT::v4i1: { + bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; + if (isVarArg) { + // We could elide this store in the case where the object fits + // entirely in R registers. Maybe later. + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + if (QFPR_idx != NumQFPRs) { + SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, + Store, PtrOff, MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); + } + ArgOffset += (IsF32 ? 16 : 32); + for (unsigned i=0; i<(IsF32 ? 16 : 32); i+=PtrByteSize) { + if (GPR_idx == NumGPRs) + break; + SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, + DAG.getConstant(i, PtrVT)); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + break; + } + + // Non-varargs QPX params go into registers or on the stack. + if (QFPR_idx != NumQFPRs) { + RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); + } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, true, MemOpChains, + TailCallArguments, dl); + if (CallConv == CallingConv::Fast) + ArgOffset += (IsF32 ? 16 : 32); + } + + if (CallConv != CallingConv::Fast) + ArgOffset += (IsF32 ? 16 : 32); + break; + } } } @@ -5384,6 +5724,9 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, } SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isVector()) + return LowerVectorLoad(Op, DAG); + assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"); @@ -5405,6 +5748,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(1).getValueType().isVector()) + return LowerVectorStore(Op, DAG); + assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); @@ -5674,6 +6020,29 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + + if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { + if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) + return SDValue(); + + SDValue Value = Op.getOperand(0); + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + if (Op.getValueType() != MVT::v4f64) + Value = DAG.getNode(ISD::FP_ROUND, dl, + Op.getValueType(), Value, DAG.getIntPtrConstant(1)); + return Value; + } + // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); @@ -6125,6 +6494,127 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { + // We first build an i32 vector, load it into a QPX register, + // then convert it to a floating-point vector and compare it + // to a zero vector to get the boolean result. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + assert(BVN->getNumOperands() == 4 && + "BUILD_VECTOR for v4i1 does not have 4 operands"); + + bool IsConst = true; + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (!isa(BVN->getOperand(i))) { + IsConst = false; + break; + } + } + + if (IsConst) { + Constant *One = + ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); + Constant *NegOne = + ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); + + SmallVector CV(4, NegOne); + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); + else if (cast(BVN->getOperand(i))-> + getConstantIntValue()->isZero()) + continue; + else + CV[i] = One; + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(), + 16 /* alignment */); + + SmallVector Ops; + Ops.push_back(DAG.getEntryNode()); + Ops.push_back(CPIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::v4i1); + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + return DAG.getMemIntrinsicNode(PPCISD::QVLFSb, + dl, VTs, Ops, MVT::v4f32, + MachinePointerInfo::getConstantPool()); + } + + SmallVector Stores; + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = 4*i; + SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); + if (StoreSize > 4) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + BVN->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + MVT::i32, false, false, 0)); + } else { + SDValue StoreValue = BVN->getOperand(i); + if (StoreSize < 4) + StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + StoreValue, Idx, + PtrInfo.getWithOffset(Offset), + false, false, 0)); + } + } + + SDValue StoreChain; + if (!Stores.empty()) + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + else + StoreChain = DAG.getEntryNode(); + + // Now load from v4i32 into the QPX register; this will extend it to + // v4i64 but not yet convert it to a floating point. Nevertheless, this + // is typed as v4f64 because the QPX register integer states are not + // explicitly represented. + + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, MVT::i32)); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::v4f64); + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, MVT::i32), + LoadedVect); + + SDValue FPZeros = DAG.getConstantFP(0.0, MVT::f64); + FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPZeros, FPZeros, FPZeros, FPZeros); + + return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); + } + + // All other QPX vectors are handled by generic code. + if (Subtarget.hasQPX()) + return SDValue(); + // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; @@ -6383,6 +6873,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + if (Subtarget.hasQPX()) { + if (VT.getVectorNumElements() != 4) + return SDValue(); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); + if (AlignIdx != -1) { + return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, + DAG.getConstant(AlignIdx, MVT::i32)); + } else if (SVOp->isSplat()) { + int SplatIdx = SVOp->getSplatIndex(); + if (SplatIdx >= 4) { + std::swap(V1, V2); + SplatIdx -= 4; + } + + // FIXME: If SplatIdx == 0 and the input came from a load, then there is + // nothing to do. + + return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, + DAG.getConstant(SplatIdx, MVT::i32)); + } + + // Lower this into a qvgpci/qvfperm pair. + + // Compute the qvgpci literal + unsigned idx = 0; + for (unsigned i = 0; i < 4; ++i) { + int m = SVOp->getMaskElt(i); + unsigned mm = m >= 0 ? (unsigned) m : i; + idx |= mm << (3-i)*3; + } + + SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, + DAG.getConstant(idx, MVT::i32)); + return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); + } + // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. @@ -6665,6 +7194,302 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, false, 0); } +SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + SDNode *N = Op.getNode(); + + assert(N->getOperand(0).getValueType() == MVT::v4i1 && + "Unknown extract_vector_elt type"); + + SDValue Value = N->getOperand(0); + + // The first part of this is like the store lowering except that we don't + // need to track the chain. + + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to + // understand how to form the extending load. + SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + // Now convert to an integer and store. + Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32), + Value); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue StoreChain = DAG.getEntryNode(); + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32)); + Ops.push_back(Value); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + + // Extract the value requested. + unsigned Offset = 4*cast(N->getOperand(1))->getZExtValue(); + SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset), + false, false, false, 0); + + if (!Subtarget.useCRBits()) + return IntVal; + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); +} + +/// Lowering for QPX v4i1 loads +SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + LoadSDNode *LN = cast(Op.getNode()); + SDValue LoadChain = LN->getChain(); + SDValue BasePtr = LN->getBasePtr(); + + if (Op.getValueType() == MVT::v4f64 || + Op.getValueType() == MVT::v4f32) { + EVT MemVT = LN->getMemoryVT(); + unsigned Alignment = LN->getAlignment(); + + // If this load is properly aligned, then it is legal. + if (Alignment >= MemVT.getStoreSize()) + return Op; + + EVT ScalarVT = Op.getValueType().getScalarType(), + ScalarMemVT = MemVT.getScalarType(); + unsigned Stride = ScalarMemVT.getStoreSize(); + + SmallVector Vals, LoadChains; + for (unsigned Idx = 0; Idx < 4; ++Idx) { + SDValue Load; + if (ScalarVT != ScalarMemVT) + Load = + DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, + BasePtr, + LN->getPointerInfo().getWithOffset(Idx*Stride), + ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), MinAlign(Alignment, Idx*Stride), + LN->getAAInfo()); + else + Load = + DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, + LN->getPointerInfo().getWithOffset(Idx*Stride), + LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), MinAlign(Alignment, Idx*Stride), + LN->getAAInfo()); + + if (Idx == 0 && LN->isIndexed()) { + assert(LN->getAddressingMode() == ISD::PRE_INC && + "Unknown addressing mode on vector load"); + Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), + LN->getAddressingMode()); + } + + Vals.push_back(Load); + LoadChains.push_back(Load.getValue(1)); + + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, BasePtr.getValueType())); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getValueType(), Vals); + + if (LN->isIndexed()) { + SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; + return DAG.getMergeValues(RetOps, dl); + } + + SDValue RetOps[] = { Value, TF }; + return DAG.getMergeValues(RetOps, dl); + } + + assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); + assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); + + // To lower v4i1 from a byte array, we load the byte elements of the + // vector and then reuse the BUILD_VECTOR logic. + + SmallVector VectElmts, VectElmtChains; + for (unsigned i = 0; i < 4; ++i) { + SDValue Idx = DAG.getConstant(i, BasePtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); + + VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD, + dl, MVT::i32, LoadChain, Idx, + LN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, + LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), + 1 /* alignment */, LN->getAAInfo())); + VectElmtChains.push_back(VectElmts[i].getValue(1)); + } + + LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts); + + SDValue RVals[] = { Value, LoadChain }; + return DAG.getMergeValues(RVals, dl); +} + +/// Lowering for QPX v4i1 stores +SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + StoreSDNode *SN = cast(Op.getNode()); + SDValue StoreChain = SN->getChain(); + SDValue BasePtr = SN->getBasePtr(); + SDValue Value = SN->getValue(); + + if (Value.getValueType() == MVT::v4f64 || + Value.getValueType() == MVT::v4f32) { + EVT MemVT = SN->getMemoryVT(); + unsigned Alignment = SN->getAlignment(); + + // If this store is properly aligned, then it is legal. + if (Alignment >= MemVT.getStoreSize()) + return Op; + + EVT ScalarVT = Value.getValueType().getScalarType(), + ScalarMemVT = MemVT.getScalarType(); + unsigned Stride = ScalarMemVT.getStoreSize(); + + SmallVector Stores; + for (unsigned Idx = 0; Idx < 4; ++Idx) { + SDValue Ex = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, + DAG.getConstant(Idx, getVectorIdxTy())); + SDValue Store; + if (ScalarVT != ScalarMemVT) + Store = + DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx*Stride), + ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(), + MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + else + Store = + DAG.getStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx*Stride), + SN->isVolatile(), SN->isNonTemporal(), + MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + + if (Idx == 0 && SN->isIndexed()) { + assert(SN->getAddressingMode() == ISD::PRE_INC && + "Unknown addressing mode on vector store"); + Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), + SN->getAddressingMode()); + } + + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, BasePtr.getValueType())); + Stores.push_back(Store); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + if (SN->isIndexed()) { + SDValue RetOps[] = { TF, Stores[0].getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + } + + return TF; + } + + assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); + assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); + + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to + // understand how to form the extending load. + SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + // Now convert to an integer and store. + Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32), + Value); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32)); + Ops.push_back(Value); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + + // Move data into the byte array. + SmallVector Loads, LoadChains; + for (unsigned i = 0; i < 4; ++i) { + unsigned Offset = 4*i; + SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset), + false, false, false, 0)); + LoadChains.push_back(Loads[i].getValue(1)); + } + + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + + SmallVector Stores; + for (unsigned i = 0; i < 4; ++i) { + SDValue Idx = DAG.getConstant(i, BasePtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); + + Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx, + SN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, + SN->isNonTemporal(), SN->isVolatile(), + 1 /* alignment */, SN->getAAInfo())); + } + + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + return StoreChain; +} + SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { @@ -6787,6 +7612,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -7411,6 +8237,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_QFRC || + MI->getOpcode() == PPC::SELECT_CC_QSRC || + MI->getOpcode() == PPC::SELECT_CC_QBRC || MI->getOpcode() == PPC::SELECT_CC_VRRC || MI->getOpcode() == PPC::SELECT_CC_VSFRC || MI->getOpcode() == PPC::SELECT_CC_VSRC || @@ -7418,6 +8247,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_QFRC || + MI->getOpcode() == PPC::SELECT_QSRC || + MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || MI->getOpcode() == PPC::SELECT_VSRC) { @@ -7451,6 +8283,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_QFRC || + MI->getOpcode() == PPC::SELECT_QSRC || + MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || MI->getOpcode() == PPC::SELECT_VSRC) { @@ -7866,7 +8701,9 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { + (VT == MVT::v2f64 && Subtarget.hasVSX()) || + (VT == MVT::v4f32 && Subtarget.hasQPX()) || + (VT == MVT::v4f64 && Subtarget.hasQPX())) { // Convergence is quadratic, so we essentially double the number of digits // correct after every iteration. For both FRE and FRSQRTE, the minimum // architected relative accuracy is 2^-5. When hasRecipPrec(), this is @@ -7887,7 +8724,9 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { + (VT == MVT::v2f64 && Subtarget.hasVSX()) || + (VT == MVT::v4f32 && Subtarget.hasQPX()) || + (VT == MVT::v4f64 && Subtarget.hasQPX())) { // Convergence is quadratic, so we essentially double the number of digits // correct after every iteration. For both FRE and FRSQRTE, the minimum // architected relative accuracy is 2^-5. When hasRecipPrec(), this is @@ -7973,6 +8812,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; + case Intrinsic::ppc_qpx_qvlfd: + case Intrinsic::ppc_qpx_qvlfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfs: + case Intrinsic::ppc_qpx_qvlfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcd: + case Intrinsic::ppc_qpx_qvlfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcs: + case Intrinsic::ppc_qpx_qvlfcsa: + VT = MVT::v2f32; + break; + case Intrinsic::ppc_qpx_qvlfiwa: + case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: @@ -7999,6 +8856,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; + case Intrinsic::ppc_qpx_qvstfd: + case Intrinsic::ppc_qpx_qvstfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfs: + case Intrinsic::ppc_qpx_qvstfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcd: + case Intrinsic::ppc_qpx_qvstfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcs: + case Intrinsic::ppc_qpx_qvstfcsa: + VT = MVT::v2f32; + break; + case Intrinsic::ppc_qpx_qvstfiw: + case Intrinsic::ppc_qpx_qvstfiwa: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: @@ -8927,14 +9802,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return expandVSXLoadForLE(N, DCI); } - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + EVT MemVT = LD->getMemoryVT(); + Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); - if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() && - // P8 and later hardware should just use LOAD. - !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || - VT == MVT::v4i32 || VT == MVT::v4f32) && + Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); + unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy); + if (LD->isUnindexed() && VT.isVector() && + ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && + // P8 and later hardware should just use LOAD. + !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32)) || + (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && + LD->getAlignment() >= ScalarABIAlignment)) && LD->getAlignment() < ABIAlignment) { - // This is a type-legal unaligned Altivec load. + // This is a type-legal unaligned Altivec or QPX load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); @@ -8963,10 +9844,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. - Intrinsic::ID Intr = (isLittleEndian ? - Intrinsic::ppc_altivec_lvsr : - Intrinsic::ppc_altivec_lvsl); - SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); + Intrinsic::ID Intr, IntrLD, IntrPerm; + MVT PermCntlTy, PermTy, LDTy; + if (Subtarget.hasAltivec()) { + Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl; + IntrLD = Intrinsic::ppc_altivec_lvx; + IntrPerm = Intrinsic::ppc_altivec_vperm; + PermCntlTy = MVT::v16i8; + PermTy = MVT::v4i32; + LDTy = MVT::v4i32; + } else { + Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : + Intrinsic::ppc_qpx_qvlpcls; + IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : + Intrinsic::ppc_qpx_qvlfs; + IntrPerm = Intrinsic::ppc_qpx_qvfperm; + PermCntlTy = MVT::v4f64; + PermTy = MVT::v4f64; + LDTy = MemVT.getSimpleVT(); + } + + SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered @@ -8975,18 +9874,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = - MF.getMachineMemOperand(LD->getMemOperand(), - -LD->getMemoryVT().getStoreSize()+1, - 2*LD->getMemoryVT().getStoreSize()-1); + MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1, + 2*MemVT.getStoreSize()-1); // Create the new base load. - SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, - getPointerTy()); + SDValue LDXIntID = DAG.getTargetConstant(IntrLD, getPointerTy()); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, - DAG.getVTList(MVT::v4i32, MVT::Other), - BaseLoadOps, MVT::v4i32, BaseMMO); + DAG.getVTList(PermTy, MVT::Other), + BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the @@ -9010,12 +9907,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), - 1, 2*LD->getMemoryVT().getStoreSize()-1); + 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, - DAG.getVTList(MVT::v4i32, MVT::Other), - ExtraLoadOps, MVT::v4i32, ExtraMMO); + DAG.getVTList(PermTy, MVT::Other), + ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); @@ -9027,14 +9924,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // and ExtraLoad here. SDValue Perm; if (isLittleEndian) - Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else - Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); - if (VT != MVT::v4i32) - Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); + if (VT != PermTy) + Perm = Subtarget.hasAltivec() ? + DAG.getNode(ISD::BITCAST, dl, VT, Perm) : + DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX + DAG.getTargetConstant(1, MVT::i64)); + // second argument is 1 because this rounding + // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. @@ -9045,15 +9947,21 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); - if (cast(N->getOperand(0))->getZExtValue() == Intr && - N->getOperand(1)->getOpcode() == ISD::ADD) { + if ((IID == Intr || + IID == Intrinsic::ppc_qpx_qvlpcld || + IID == Intrinsic::ppc_qpx_qvlpcls) && + N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); + int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? + 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; + if (DAG.MaskedValueIsZero( Add->getOperand(1), - APInt::getAllOnesValue(4 /* 16 byte alignment */) + APInt::getAllOnesValue(Bits /* alignment */) .zext( Add.getValueType().getScalarType().getSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); @@ -9061,8 +9969,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast(UI->getOperand(0))->getZExtValue() == - Intr) { + cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. @@ -9071,6 +9978,27 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + if (isa(Add->getOperand(1))) { + SDNode *BasePtr = Add->getOperand(0).getNode(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::ADD && + isa(UI->getOperand(1)) && + (cast(Add->getOperand(1))->getZExtValue() - + cast(UI->getOperand(1))->getZExtValue()) % + (1 << Bits) == 0) { + SDNode *OtherAdd = *UI; + for (SDNode::use_iterator VI = OtherAdd->use_begin(), + VE = OtherAdd->use_end(); VI != VE; ++VI) { + if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + cast(VI->getOperand(0))->getZExtValue() == IID) { + return SDValue(*VI, 0); + } + } + } + } + } } } @@ -9521,8 +10449,16 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); break; case 'v': + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); @@ -9642,7 +10578,9 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const { - // FIXME: PPC does not allow r+i addressing modes for vectors! + // PPC does not allow r+i addressing modes for vectors! + if (Ty->isVectorTy() && AM.BaseOffs != 0) + return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) @@ -9773,6 +10711,12 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned Intrinsic) const { switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvlfd: + case Intrinsic::ppc_qpx_qvlfs: + case Intrinsic::ppc_qpx_qvlfcd: + case Intrinsic::ppc_qpx_qvlfcs: + case Intrinsic::ppc_qpx_qvlfiwa: + case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: @@ -9794,6 +10738,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; + case Intrinsic::ppc_qpx_qvlfd: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfs: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcd: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcs: + VT = MVT::v2f32; + break; default: VT = MVT::v4i32; break; @@ -9810,6 +10766,47 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::ppc_qpx_qvlfda: + case Intrinsic::ppc_qpx_qvlfsa: + case Intrinsic::ppc_qpx_qvlfcda: + case Intrinsic::ppc_qpx_qvlfcsa: + case Intrinsic::ppc_qpx_qvlfiwaa: + case Intrinsic::ppc_qpx_qvlfiwza: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvlfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcsa: + VT = MVT::v2f32; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.size = VT.getStoreSize(); + Info.align = 1; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::ppc_qpx_qvstfd: + case Intrinsic::ppc_qpx_qvstfs: + case Intrinsic::ppc_qpx_qvstfcd: + case Intrinsic::ppc_qpx_qvstfcs: + case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: @@ -9831,6 +10828,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; + case Intrinsic::ppc_qpx_qvstfd: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfs: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcd: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcs: + VT = MVT::v2f32; + break; default: VT = MVT::v4i32; break; @@ -9847,6 +10856,41 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::ppc_qpx_qvstfda: + case Intrinsic::ppc_qpx_qvstfsa: + case Intrinsic::ppc_qpx_qvstfcda: + case Intrinsic::ppc_qpx_qvstfcsa: + case Intrinsic::ppc_qpx_qvstfiwa: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvstfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcsa: + VT = MVT::v2f32; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.size = VT.getStoreSize(); + Info.align = 1; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + return true; + } default: break; } @@ -10009,6 +11053,11 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( if (VT == MVT::v2i64) return false; + if (Subtarget.hasQPX()) { + if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) + return true; + } + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6e12d9c097a..47d9c68f538 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -283,6 +283,22 @@ namespace llvm { /// of outputs. XXSWAPD, + /// QVFPERM = This corresponds to the QPX qvfperm instruction. + QVFPERM, + + /// QVGPCI = This corresponds to the QPX qvgpci instruction. + QVGPCI, + + /// QVALIGNI = This corresponds to the QPX qvaligni instruction. + QVALIGNI, + + /// QVESPLATI = This corresponds to the QPX qvesplati instruction. + QVESPLATI, + + /// QBFLT = Access the underlying QPX floating-point boolean + /// representation. + QBFLT, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -332,7 +348,11 @@ namespace llvm { /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. - STXVD2X + STXVD2X, + + /// QBRC, CHAIN = QVLFSb CHAIN, Ptr + /// The 4xf32 load used for v4i1 constants. + QVLFSb }; } @@ -381,6 +401,10 @@ namespace llvm { /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + + /// If this is a qvaligni shuffle mask, return the shift + /// amount, otherwise return -1. + int isQVALIGNIShuffleMask(SDNode *N); } class PPCTargetLowering : public TargetLowering { @@ -679,11 +703,15 @@ namespace llvm { SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 0410b1c7590..506a2d0c7ae 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -562,6 +562,47 @@ class XForm_17 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +// Used for QPX +class XForm_18 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + + let Pattern = pattern; + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_19 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_18 { + let FRA = 0; +} + +class XForm_20 opcode, bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + bits<4> tttt; + + let Pattern = pattern; + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-24} = tttt; + let Inst{25-30} = xo; + let Inst{31} = 0; +} + class XForm_24 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : I { @@ -1215,6 +1256,14 @@ class AForm_4 opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +// Used for QPX +class AForm_4a opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : AForm_1 { + let FRA = 0; + let FRC = 0; +} + // 1.7.13 M-Form class MForm_1 opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> @@ -1439,6 +1488,49 @@ class VXRForm_1 xo, dag OOL, dag IOL, string asmstr, let Inst{22-31} = xo; } +// Z23-Form (used by QPX) +class Z23Form_1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + bits<2> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + +class Z23Form_2 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : Z23Form_1 { + let FRB = 0; +} + +class Z23Form_3 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<12> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + //===----------------------------------------------------------------------===// class Pseudo pattern> : I<0, OOL, IOL, asmstr, NoItinerary> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d1c60a2e37c..fe9474a5de0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -181,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::RESTORE_CRBIT: case PPC::LVX: case PPC::LXVD2X: + case PPC::QVLFDX: + case PPC::QVLFSXs: + case PPC::QVLFDXb: case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -207,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::SPILL_CRBIT: case PPC::STVX: case PPC::STXVD2X: + case PPC::QVSTFDX: + case PPC::QVSTFSXs: + case PPC::QVSTFDXb: case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -759,6 +765,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::XXLOR; else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::XXLORf; + else if (PPC::QFRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::QVFMR; + else if (PPC::QSRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::QVFMRs; + else if (PPC::QBRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else @@ -844,6 +856,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); SpillsVRS = true; + } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -939,6 +969,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, DestReg), FrameIdx)); SpillsVRS = true; + } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4e3980dfc9b..c2c53355b6e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -61,6 +61,27 @@ def tocentry32 : Operand { let MIOperandInfo = (ops i32imm:$imm); } +def SDT_PPCqvfperm : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3> +]>; +def SDT_PPCqvgpci : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisInt<1> +]>; +def SDT_PPCqvaligni : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3> +]>; +def SDT_PPCqvesplati : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> +]>; + +def SDT_PPCqbflt : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; + +def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -127,6 +148,16 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; +def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; +def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>; +def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>; + +def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>; + +def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb, + [SDNPHasChain, SDNPMayLoad]>; + def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift @@ -464,6 +495,15 @@ def u6imm : Operand { let ParserMatchClass = PPCU6ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<6>"; } +def PPCU12ImmAsmOperand : AsmOperandClass { + let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; + let RenderMethod = "addImmOperands"; +} +def u12imm : Operand { + let PrintMethod = "printU12ImmOperand"; + let ParserMatchClass = PPCU12ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<12>"; +} def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; let RenderMethod = "addS16ImmOperands"; @@ -680,6 +720,10 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; def IsE500 : Predicate<"PPCSubTarget->isE500()">; def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; + +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; +def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; + //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -2643,6 +2687,7 @@ include "PPCInstrAltivec.td" include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" +include "PPCInstrQPX.td" def crnot : OutPatFrag<(ops node:$in), (CRNOR $in, $in)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td new file mode 100644 index 00000000000..c984d461d25 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -0,0 +1,1192 @@ +//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the QPX extension to the PowerPC instruction set. +// Reference: +// Book Q: QPX Architecture Definition. IBM (as updated in) 2011. +// +//===----------------------------------------------------------------------===// + +def PPCRegQFRCAsmOperand : AsmOperandClass { + let Name = "RegQFRC"; let PredicateMethod = "isRegNumber"; +} +def qfrc : RegisterOperand { + let ParserMatchClass = PPCRegQFRCAsmOperand; +} +def PPCRegQSRCAsmOperand : AsmOperandClass { + let Name = "RegQSRC"; let PredicateMethod = "isRegNumber"; +} +def qsrc : RegisterOperand { + let ParserMatchClass = PPCRegQSRCAsmOperand; +} +def PPCRegQBRCAsmOperand : AsmOperandClass { + let Name = "RegQBRC"; let PredicateMethod = "isRegNumber"; +} +def qbrc : RegisterOperand { + let ParserMatchClass = PPCRegQBRCAsmOperand; +} + +//===----------------------------------------------------------------------===// +// Helpers for defining instructions that directly correspond to intrinsics. + +// QPXA1_Int - A AForm_1 intrinsic definition. +class QPXA1_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_1; +// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions). +class QPXA1s_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_1; +// QPXA2_Int - A AForm_2 intrinsic definition. +class QPXA2_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_2; +// QPXA3_Int - A AForm_3 intrinsic definition. +class QPXA3_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_3; +// QPXA4_Int - A AForm_4a intrinsic definition. +class QPXA4_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_4a; +// QPXX18_Int - A XForm_18 intrinsic definition. +class QPXX18_Int opcode, bits<10> xo, string opc, Intrinsic IntID> + : XForm_18; +// QPXX19_Int - A XForm_19 intrinsic definition. +class QPXX19_Int opcode, bits<10> xo, string opc, Intrinsic IntID> + : XForm_19; + +//===----------------------------------------------------------------------===// +// Pattern Frags. + +def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; +def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, + node:$base, node:$offset), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{ + return cast(N->getOperand(1))->getZExtValue() == 0; +}]>; + +def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{ + return cast(N->getOperand(1))->getZExtValue() == 1; +}]>; + +let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs. + def u12 : ImmLeaf; + +//===----------------------------------------------------------------------===// +// Instruction Definitions. + +def HasQPX : Predicate<"PPCSubTarget->hasQPX()">; +let Predicates = [HasQPX] in { +let DecoderNamespace = "QPX" in { +let hasSideEffects = 0 in { // QPX instructions don't have side effects. +let Uses = [RM] in { + // Add Instructions + let isCommutable = 1 in { + def QVFADD : AForm_2<4, 21, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>; + def QVFADDSs : AForm_2<0, 21, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>; + } + def QVFSUB : AForm_2<4, 20, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>; + def QVFSUBSs : AForm_2<0, 20, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>; + + // Estimate Instructions + def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfre $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>; + def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>; + let isCodeGenOnly = 1 in + def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfres $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>; + + def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrsqrte $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>; + def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>; + let isCodeGenOnly = 1 in + def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>; + + // Multiply Instructions + let isCommutable = 1 in { + def QVFMUL : AForm_3<4, 25, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC), + "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>; + let isCodeGenOnly = 1 in + def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>; + def QVFMULSs : AForm_3<0, 25, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC), + "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>; + } + def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>; + def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>; + + // Multiply-add instructions + def QVFMADD : AForm_1<4, 29, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>; + def QVFMADDSs : AForm_1<0, 29, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>; + def QVFNMADD : AForm_1<4, 31, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC, + v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>; + def QVFNMADDSs : AForm_1<0, 31, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC, + v4f32:$FRB)))]>; + def QVFMSUB : AForm_1<4, 28, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, + (fneg v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>; + def QVFMSUBSs : AForm_1<0, 28, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, + (fneg v4f32:$FRB)))]>; + def QVFNMSUB : AForm_1<4, 30, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC, + (fneg v4f64:$FRB))))]>; + let isCodeGenOnly = 1 in + def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>; + def QVFNMSUBSs : AForm_1<0, 30, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC, + (fneg v4f32:$FRB))))]>; + def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>; + def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>; + def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>; + def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>; + def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>; + def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>; + def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>; + def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>; + + // Select Instruction + let isCodeGenOnly = 1 in + def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>; + def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT), + (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (vselect v4i1:$FRA, + v4f64:$FRC, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT), + (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (vselect v4i1:$FRA, + v4f32:$FRC, v4f32:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT), + (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4i1:$FRT, (vselect v4i1:$FRA, + v4i1:$FRC, v4i1:$FRB))]>; + + // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after + // instruction selection into a branch sequence. + let usesCustomInserter = 1 in { + def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F, + i32imm:$BROPC), "#SELECT_CC_QFRC", + []>; + def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F, + i32imm:$BROPC), "#SELECT_CC_QSRC", + []>; + def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F, + i32imm:$BROPC), "#SELECT_CC_QBRC", + []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond, + qfrc:$T, qfrc:$F), "#SELECT_QFRC", + [(set v4f64:$dst, + (select i1:$cond, v4f64:$T, v4f64:$F))]>; + def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond, + qsrc:$T, qsrc:$F), "#SELECT_QSRC", + [(set v4f32:$dst, + (select i1:$cond, v4f32:$T, v4f32:$F))]>; + def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond, + qbrc:$T, qbrc:$F), "#SELECT_QBRC", + [(set v4i1:$dst, + (select i1:$cond, v4i1:$T, v4i1:$F))]>; + } + + // Convert and Round Instructions + def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>; + let isCodeGenOnly = 1 in + def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfctid $FRT, $FRB", IIC_FPGeneral, []>; + + def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>; + def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>; + def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>; + def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>; + def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>; + def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>; + def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>; + def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>; + let isCodeGenOnly = 1 in + def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>; + + def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>; + def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>; + def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>; + + let isCodeGenOnly = 1 in + def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>; + def QVFRSPs : XForm_19<4, 12, + (outs qsrc:$FRT), (ins qfrc:$FRB), + "qvfrsp $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>; + + def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfriz $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfriz $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>; + + def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrin $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (frnd v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrin $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (frnd v4f32:$FRB))]>; + + def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrip $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fceil v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrip $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fceil v4f32:$FRB))]>; + + def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrim $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrim $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>; + + // Move Instructions + def QVFMR : XForm_19<4, 72, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4f64:$FRT, v4f64:$FRB) */]>; + let isCodeGenOnly = 1 in { + def QVFMRs : XForm_19<4, 72, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4f32:$FRT, v4f32:$FRB) */]>; + def QVFMRb : XForm_19<4, 72, + (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4i1:$FRT, v4i1:$FRB) */]>; + } + def QVFNEG : XForm_19<4, 40, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfneg $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fneg v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFNEGs : XForm_19<4, 40, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfneg $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fneg v4f32:$FRB))]>; + def QVFABS : XForm_19<4, 264, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfabs $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fabs v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFABSs : XForm_19<4, 264, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfabs $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fabs v4f32:$FRB))]>; + def QVFNABS : XForm_19<4, 136, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfnabs $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFNABSs : XForm_19<4, 136, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfnabs $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>; + def QVFCPSGN : XForm_18<4, 8, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>; + let isCodeGenOnly = 1 in + def QVFCPSGNs : XForm_18<4, 8, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>; + + def QVALIGNI : Z23Form_1<4, 5, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvaligni v4f64:$FRA, v4f64:$FRB, + (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVALIGNIs : Z23Form_1<4, 5, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvaligni v4f32:$FRA, v4f32:$FRB, + (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVALIGNIb : Z23Form_1<4, 5, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4i1:$FRT, + (PPCqvaligni v4i1:$FRA, v4i1:$FRB, + (i32 imm:$idx)))]>; + + def QVESPLATI : Z23Form_2<4, 37, + (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVESPLATIs : Z23Form_2<4, 37, + (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVESPLATIb : Z23Form_2<4, 37, + (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4i1:$FRT, + (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>; + + def QVFPERM : AForm_1<4, 6, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>; + let isCodeGenOnly = 1 in + def QVFPERMs : AForm_1<4, 6, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC), + "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>; + + let isReMaterializable = 1, isAsCheapAsAMove = 1 in + def QVGPCI : Z23Form_3<4, 133, + (outs qfrc:$FRT), (ins u12imm:$idx), + "qvgpci $FRT, $idx", IIC_VecPerm, + [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>; + + // Compare Instruction + let isCodeGenOnly = 1 in + def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>; + def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>; + let isCodeGenOnly = 1 in + def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>; + let isCodeGenOnly = 1 in + def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>; + def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>; + let isCodeGenOnly = 1 in + def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>; + let isCodeGenOnly = 1 in + def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>; + def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>; + let isCodeGenOnly = 1 in + def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>; + let isCodeGenOnly = 1 in + def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>; + def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>; + let isCodeGenOnly = 1 in + def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>; + + let isCodeGenOnly = 1 in + def QVFLOGICAL : XForm_20<4, 4, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + def QVFLOGICALb : XForm_20<4, 4, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + let isCodeGenOnly = 1 in + def QVFLOGICALs : XForm_20<4, 4, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + + // Load indexed instructions + let mayLoad = 1, canFoldAsLoad = 1 in { + def QVLFDX : XForm_1<31, 583, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfdx $FRT, $src", IIC_LdStLFD, + [(set v4f64:$FRT, (load xoaddr:$src))]>; + let isCodeGenOnly = 1 in + def QVLFDXb : XForm_1<31, 583, + (outs qbrc:$FRT), (ins memrr:$src), + "qvlfdx $FRT, $src", IIC_LdStLFD, []>; + + let RC = 1 in + def QVLFDXA : XForm_1<31, 583, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfdxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFDUX : XForm_1<31, 615, + (outs qfrc:$FRT, ptr_rc_nor0:$ea_result), + (ins memrr:$src), + "qvlfdux $FRT, $src", IIC_LdStLFDU, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + let RC = 1 in + def QVLFDUXA : XForm_1<31, 615, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfduxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFSX : XForm_1<31, 519, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>; + + let isCodeGenOnly = 1 in + def QVLFSXb : XForm_1<31, 519, + (outs qbrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>; + let isCodeGenOnly = 1 in + def QVLFSXs : XForm_1<31, 519, + (outs qsrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4f32:$FRT, (load xoaddr:$src))]>; + + let RC = 1 in + def QVLFSXA : XForm_1<31, 519, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFSUX : XForm_1<31, 551, + (outs qsrc:$FRT, ptr_rc_nor0:$ea_result), + (ins memrr:$src), + "qvlfsux $FRT, $src", IIC_LdStLFDU, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + let RC = 1 in + def QVLFSUXA : XForm_1<31, 551, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCDX : XForm_1<31, 71, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdx $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCDXA : XForm_1<31, 71, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCDUX : XForm_1<31, 103, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdux $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCDUXA : XForm_1<31, 103, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCSX : XForm_1<31, 7, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsx $FRT, $src", IIC_LdStLFD, []>; + let isCodeGenOnly = 1 in + def QVLFCSXs : XForm_1<31, 7, + (outs qsrc:$FRT), (ins memrr:$src), + "qvlfcsx $FRT, $src", IIC_LdStLFD, []>; + + let RC = 1 in + def QVLFCSXA : XForm_1<31, 7, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCSUX : XForm_1<31, 39, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsux $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCSUXA : XForm_1<31, 39, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFIWAX : XForm_1<31, 871, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwax $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFIWAXA : XForm_1<31, 871, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFIWZX : XForm_1<31, 839, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFIWZXA : XForm_1<31, 839, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>; + } + + + def QVLPCLDX : XForm_1<31, 582, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcldx $FRT, $src", IIC_LdStLFD, []>; + def QVLPCLSX : XForm_1<31, 518, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpclsx $FRT, $src", IIC_LdStLFD, []>; + let isCodeGenOnly = 1 in + def QVLPCLSXint : XForm_11<31, 518, + (outs qfrc:$FRT), (ins G8RC:$src), + "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>; + def QVLPCRDX : XForm_1<31, 70, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>; + def QVLPCRSX : XForm_1<31, 6, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>; + + // Store indexed instructions + let mayStore = 1 in { + def QVSTFDX : XForm_8<31, 711, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdx $FRT, $dst", IIC_LdStSTFD, + [(store qfrc:$FRT, xoaddr:$dst)]>; + let isCodeGenOnly = 1 in + def QVSTFDXb : XForm_8<31, 711, + (outs), (ins qbrc:$FRT, memrr:$dst), + "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>; + + let RC = 1 in + def QVSTFDXA : XForm_8<31, 711, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res), + (ins qfrc:$FRT, memrr:$dst), + "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + + let RC = 1 in + def QVSTFDUXA : XForm_8<31, 743, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDXI : XForm_8<31, 709, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFDXIA : XForm_8<31, 709, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDUXI : XForm_8<31, 741, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFDUXIA : XForm_8<31, 741, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSX : XForm_8<31, 647, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsx $FRT, $dst", IIC_LdStSTFD, + [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>; + let isCodeGenOnly = 1 in + def QVSTFSXs : XForm_8<31, 647, + (outs), (ins qsrc:$FRT, memrr:$dst), + "qvstfsx $FRT, $dst", IIC_LdStSTFD, + [(store qsrc:$FRT, xoaddr:$dst)]>; + + let RC = 1 in + def QVSTFSXA : XForm_8<31, 647, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res), + (ins qsrc:$FRT, memrr:$dst), + "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + let isCodeGenOnly = 1 in + def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res), + (ins qfrc:$FRT, memrr:$dst), + "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + + let RC = 1 in + def QVSTFSUXA : XForm_8<31, 679, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSXI : XForm_8<31, 645, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFSXIA : XForm_8<31, 645, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSUXI : XForm_8<31, 677, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFSUXIA : XForm_8<31, 677, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDX : XForm_8<31, 199, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDXA : XForm_8<31, 199, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSX : XForm_8<31, 135, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>; + let isCodeGenOnly = 1 in + def QVSTFCSXs : XForm_8<31, 135, + (outs), (ins qsrc:$FRT, memrr:$dst), + "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>; + + let RC = 1 in + def QVSTFCSXA : XForm_8<31, 135, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDUX : XForm_8<31, 231, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDUXA : XForm_8<31, 231, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSUX : XForm_8<31, 167, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSUXA : XForm_8<31, 167, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDXI : XForm_8<31, 197, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDXIA : XForm_8<31, 197, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSXI : XForm_8<31, 133, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSXIA : XForm_8<31, 133, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDUXI : XForm_8<31, 229, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDUXIA : XForm_8<31, 229, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSUXI : XForm_8<31, 165, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSUXIA : XForm_8<31, 165, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFIWX : XForm_8<31, 967, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFIWXA : XForm_8<31, 967, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>; + } +} + +} // neverHasSideEffects +} + +def : InstAlias<"qvfclr $FRT", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>; +def : InstAlias<"qvfand $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>; +def : InstAlias<"qvfandc $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>; +def : InstAlias<"qvfctfb $FRT, $FRA", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>; +def : InstAlias<"qvfxor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>; +def : InstAlias<"qvfor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>; +def : InstAlias<"qvfnor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>; +def : InstAlias<"qvfequ $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>; +def : InstAlias<"qvfnot $FRT, $FRA", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>; +def : InstAlias<"qvforc $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>; +def : InstAlias<"qvfnand $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>; +def : InstAlias<"qvfset $FRT", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>; + +//===----------------------------------------------------------------------===// +// Additional QPX Patterns +// + +def : Pat<(v4f64 (scalar_to_vector f64:$A)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>; +def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>; + +def : Pat<(f64 (vector_extract v4f64:$S, 0)), + (EXTRACT_SUBREG $S, sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (EXTRACT_SUBREG $S, sub_64)>; + +def : Pat<(f64 (vector_extract v4f64:$S, 1)), + (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>; +def : Pat<(f64 (vector_extract v4f64:$S, 2)), + (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>; +def : Pat<(f64 (vector_extract v4f64:$S, 3)), + (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>; + +def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>; + +def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)), + (EXTRACT_SUBREG (QVFPERM $S, $S, + (QVLPCLSXint (RLDICR $F, 2, + /* 63-2 = */ 61))), + sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)), + (EXTRACT_SUBREG (QVFPERMs $S, $S, + (QVLPCLSXint (RLDICR $F, 2, + /* 63-2 = */ 61))), + sub_64)>; + +def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C), + (QVFPERM $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B), + (QVFCPSGN $A, $B)>; + +// FCOPYSIGN's operand types need not agree. +def : Pat<(fcopysign v4f64:$frB, v4f32:$frA), + (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>; +def : Pat<(fcopysign QSRC:$frB, QFRC:$frA), + (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>; + +def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>; +def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>; +def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>; + +def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>; +def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>; +def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>; +def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>; + +def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>; +def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>; + +def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B), + (QVFADD $A, $B)>; +def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B), + (QVFSUB $A, $B)>; +def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B), + (QVFMUL $A, $B)>; + +// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B), + (QVFNMSUB $A, $B, $C)>; +def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B), + (QVFNMSUB $A, $B, $C)>; +def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), + (QVFNMSUBSs $A, $B, $C)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), + (QVFNMSUBSs $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C), + (QVFMADD $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C), + (QVFNMADD $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C), + (QVFMSUB $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C), + (QVFNMSUB $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src), + (QVLFDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src), + (QVLFDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src), + (QVLFSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src), + (QVLFSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src), + (QVLFCDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src), + (QVLFCDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src), + (QVLFCSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src), + (QVLFCSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src), + (QVLFDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src), + (QVLFIWAXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src), + (QVLFIWAX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src), + (QVLFIWZXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src), + (QVLFIWZX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src), + (QVLFSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src), + (QVLPCLDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src), + (QVLPCLSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src), + (QVLPCRDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src), + (QVLPCRSX xoaddr:$src)>; + +def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst), + (QVSTFDX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst), + (QVSTFSX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst), + (QVSTFCDXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst), + (QVSTFCDX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst), + (QVSTFCSXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst), + (QVSTFCSX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst), + (QVSTFDXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst), + (QVSTFIWXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst), + (QVSTFIWX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst), + (QVSTFSXA $T, xoaddr:$dst)>; + +def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFDUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFSUXs $rS, $ptrreg, $ptroff)>; + +def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)), + (QVFLOGICAL $A, $B, imm:$idx)>; +def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)), + (QVGPCI imm:$idx)>; + +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPLTb $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPGTb $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPEQb $FRA, $FRB), (i32 13))>; + +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ), + (QVFCMPEQb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT), + (QVFCMPGTb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFCMPLTb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT), + (QVFCMPLTb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFCMPGTb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFCMPEQb $FRA, $FRB), (i32 10))>; + +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPLTbs $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPGTbs $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPEQbs $FRA, $FRB), (i32 13))>; + +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ), + (QVFCMPEQbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT), + (QVFCMPGTbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFCMPLTbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT), + (QVFCMPLTbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFCMPGTbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFCMPEQbs $FRA, $FRB), (i32 10))>; + +def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 4))>; +def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 8))>; +def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 9))>; +def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 13))>; +def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 14))>; + +def : Pat<(and v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 1))>; +def : Pat<(or v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 7))>; +def : Pat<(xor v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 6))>; +def : Pat<(not v4i1:$FRA), + (QVFLOGICALb $FRA, $FRA, (i32 10))>; + +def : Pat<(v4f64 (fextend v4f32:$src)), + (COPY_TO_REGCLASS $src, QFRC)>; + +def : Pat<(v4f32 (fround_exact v4f64:$src)), + (COPY_TO_REGCLASS $src, QSRC)>; + +// Extract the underlying floating-point values from the +// QPX (-1.0, 1.0) boolean representation. +def : Pat<(v4f64 (PPCqbflt v4i1:$src)), + (COPY_TO_REGCLASS $src, QFRC)>; + +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)), + (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)), + (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)), + (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)), + (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)), + (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)), + (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +} // end HasQPX + +let Predicates = [HasQPX, NoNaNsFPMath] in { +def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>; +def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>; + +def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>; +def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>; +} + +let Predicates = [HasQPX, NaNsFPMath] in { +// When either of these operands is NaN, we should return the other operand. +// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need +// to explicitly or with a NaN test on the second operand. +def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; + +def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +} + diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 41bb11f47a0..c9a96840a9b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -275,6 +275,9 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } case PPC::F8RCRegClassID: case PPC::F4RCRegClassID: + case PPC::QFRCRegClassID: + case PPC::QSRCRegClassID: + case PPC::QBRCRegClassID: case PPC::VRRCRegClassID: case PPC::VFRCRegClassID: case PPC::VSLRCRegClassID: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 62416bc5d9f..9a7df9615cc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -49,6 +49,13 @@ class FPR num, string n> : PPCReg { let HWEncoding{4-0} = num; } +// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX) +class QFPR : PPCReg { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + // VF - One of the 32 64-bit floating-point subregisters of the vector // registers (used by VSX). class VF num, string n> : PPCReg { @@ -114,6 +121,12 @@ foreach Index = 0-31 in { def VF#Index : VF; } +// QPX Floating-point registers +foreach Index = 0-31 in { + def QF#Index : QFPR("F"#Index), "q"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} + // Vector registers foreach Index = 0-31 in { def V#Index : VR("VF"#Index), "v"#Index>, @@ -303,6 +316,16 @@ def VFRC : RegisterClass<"PPC", [f64], 64, VF22, VF21, VF20)>; def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; +// For QPX +def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13), + (sequence "QF%u", 31, 14))>; +def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>; +def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> { + // These are actually stored as floating-point values where a positive + // number is true and anything else (including NaN) is false. + let Size = 256; +} + def CRBITRC : RegisterClass<"PPC", [i1], 32, (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 8d3d5c4e408..c91428db3a9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -37,6 +37,10 @@ using namespace llvm; static cl::opt UseSubRegLiveness("ppc-track-subreg-liveness", cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden); +static cl::opt QPXStackUnaligned("qpx-stack-unaligned", + cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"), + cl::Hidden); + PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); @@ -90,6 +94,7 @@ void PPCSubtarget::initializeEnvironment() { HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; + IsQPXStackUnaligned = false; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -126,8 +131,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // QPX requires a 32-byte aligned stack. Note that we need to do this if // we're compiling for a BG/Q system regardless of whether or not QPX // is enabled because external functions will assume this alignment. - if (hasQPX() || isBGQ()) - StackAlignment = 32; + IsQPXStackUnaligned = QPXStackUnaligned; + StackAlignment = getPlatformStackAlignment(); // Determine endianness. // FIXME: Part of the TargetMachine. diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 704a226ed33..247a96d405e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -114,6 +114,11 @@ protected: bool HasICBT; bool HasInvariantFunctionDescriptors; + /// When targeting QPX running a stock PPC64 Linux kernel where the stack + /// alignment has not been changed, we need to keep the 16-byte alignment + /// of the stack. + bool IsQPXStackUnaligned; + const PPCTargetMachine &TM; PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; @@ -230,6 +235,14 @@ public: return HasInvariantFunctionDescriptors; } + bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } + unsigned getPlatformStackAlignment() const { + if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned()) + return 32; + + return 16; + } + const Triple &getTargetTriple() const { return TargetTriple; } /// isDarwin - True if this is any darwin platform. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index e1d46f72542..073bbb0c556 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -193,13 +193,14 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, } unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { - if (Vector && !ST->hasAltivec()) + if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; return ST->hasVSX() ? 64 : 32; } unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { if (Vector) { + if (ST->hasQPX()) return 256; if (ST->hasAltivec()) return 128; return 0; } @@ -276,6 +277,12 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (Index == 0) return 0; + return BaseT::getVectorInstrCost(Opcode, Val, Index); + } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) { + // Floating point scalars are already located in index #0. + if (Index == 0) + return 0; + return BaseT::getVectorInstrCost(Opcode, Val, Index); } diff --git a/test/CodeGen/PowerPC/qpx-bv-sint.ll b/test/CodeGen/PowerPC/qpx-bv-sint.ll new file mode 100644 index 00000000000..0bc14ed4351 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-bv-sint.ll @@ -0,0 +1,33 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @s452() nounwind { +entry: + br label %for.body4 + +for.body4: ; preds = %for.body4, %entry + %conv.4 = sitofp i32 undef to double + %conv.5 = sitofp i32 undef to double + %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0 + %mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1 + %mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef + %add7.4 = fadd <2 x double> undef, %mul.4 + store <2 x double> %add7.4, <2 x double>* undef, align 16 + br i1 undef, label %for.end, label %for.body4 + +for.end: ; preds = %for.body4 + unreachable +; CHECK-LABEL: @s452 +; CHECK: lfiwax [[REG1:[0-9]+]], +; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]] +; FIXME: We could 'promote' this to a vector earlier and remove this splat. +; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0 +; CHECK: qvfmul +; CHECK: qvfadd +; CHECK: qvesplati {{[0-9]+}}, +; FIXME: We can use qvstfcdx here instead of two stores. +; CHECK: stfd +; CHECK: stfd +} + diff --git a/test/CodeGen/PowerPC/qpx-bv.ll b/test/CodeGen/PowerPC/qpx-bv.ll new file mode 100644 index 00000000000..ae181de383b --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-bv.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=a2q | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) { + %v1 = insertelement <4 x double> undef, double %f1, i32 0 + %v2 = insertelement <4 x double> %v1, double %f2, i32 1 + %v3 = insertelement <4 x double> %v2, double %f3, i32 2 + %v4 = insertelement <4 x double> %v3, double %f4, i32 3 + ret <4 x double> %v4 + +; CHECK-LABEL: @foo +; CHECK: qvgpci [[REG1:[0-9]+]], 275 +; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101 +; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]] +; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]] +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) { + %v1 = insertelement <4 x float> undef, float %f1, i32 0 + %v2 = insertelement <4 x float> %v1, float %f2, i32 1 + %v3 = insertelement <4 x float> %v2, float %f3, i32 2 + %v4 = insertelement <4 x float> %v3, float %f4, i32 3 + ret <4 x float> %v4 + +; CHECK-LABEL: @goo +; CHECK: qvgpci [[REG1:[0-9]+]], 275 +; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101 +; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]] +; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]] +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]] +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-func-clobber.ll b/test/CodeGen/PowerPC/qpx-func-clobber.ll new file mode 100644 index 00000000000..c1b808aa7c4 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-func-clobber.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +declare <4 x double> @foo(<4 x double> %p) + +define <4 x double> @bar(<4 x double> %p, <4 x double> %q) { +entry: + %v = call <4 x double> @foo(<4 x double> %p) + %w = call <4 x double> @foo(<4 x double> %q) + %x = fadd <4 x double> %v, %w + ret <4 x double> %x + +; CHECK-LABEL: @bar +; CHECK: qvstfdx 2, +; CHECK: bl foo +; CHECK: qvstfdx 1, +; CHECK: qvlfdx 1, +; CHECK: bl foo +; CHECK: qvlfdx [[REG:[0-9]+]], +; CHECK: qvfadd 1, [[REG]], 1 +} + diff --git a/test/CodeGen/PowerPC/qpx-load.ll b/test/CodeGen/PowerPC/qpx-load.ll new file mode 100644 index 00000000000..2eb29081e26 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-load.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define <4 x double> @foo(<4 x double>* %p) { +entry: + %v = load <4 x double>* %p, align 8 + ret <4 x double> %v +} + +; CHECK: @foo +; CHECK-DAG: li [[REG1:[0-9]+]], 31 +; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3 +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]] +; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3 +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]] +; CHECK: blr + +define <4 x double> @bar(<4 x double>* %p) { +entry: + %v = load <4 x double>* %p, align 32 + ret <4 x double> %v +} + +; CHECK: @bar +; CHECK: qvlfdx + diff --git a/test/CodeGen/PowerPC/qpx-recipest.ll b/test/CodeGen/PowerPC/qpx-recipest.ll new file mode 100644 index 00000000000..0e01358e579 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-recipest.ll @@ -0,0 +1,194 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) + +define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind { +entry: + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv <4 x double> %a, %x + ret <4 x double> %r + +; CHECK-LABEL: @foo +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo +; CHECK-SAFE: fsqrt +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind { +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv <4 x double> %a, %y + ret <4 x double> %r + +; CHECK-LABEL: @foof +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foof +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind { +entry: + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %y = fptrunc <4 x double> %x to <4 x float> + %r = fdiv <4 x float> %a, %y + ret <4 x float> %r + +; CHECK-LABEL: @food +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfrsp +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @food +; CHECK-SAFE: fsqrt +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv <4 x float> %a, %x + ret <4 x float> %r + +; CHECK-LABEL: @goo +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind { +entry: + %r = fdiv <4 x double> %a, %b + ret <4 x double> %r + +; CHECK-LABEL: @foo2 +; CHECK: qvfre +; CHECK: qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo2 +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r + +; CHECK-LABEL: @goo2 +; CHECK: qvfres +; CHECK: qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo2 +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x double> @foo3(<4 x double> %a) nounwind { +entry: + %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %r + +; CHECK-LABEL: @foo3 +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfcmpeq +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmul +; CHECK: qvfsel +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo3 +; CHECK-SAFE: fsqrt +; CHECK-SAFE: blr +} + +define <4 x float> @goo3(<4 x float> %a) nounwind { +entry: + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r + +; CHECK-LABEL: @goo3 +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadds instead of a qvfnmsubs +; CHECK-DAG: qvfmadds +; CHECK-DAG: qvfcmpeq +; CHECK-DAG: qvfmadds +; CHECK-DAG: qvfmuls +; CHECK-DAG: qvfmuls +; CHECK: qvfsel +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo3 +; CHECK-SAFE: fsqrts +; CHECK-SAFE: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-rounding-ops.ll b/test/CodeGen/PowerPC/qpx-rounding-ops.ll new file mode 100644 index 00000000000..6fdd8e6a714 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-rounding-ops.ll @@ -0,0 +1,109 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define <4 x float> @test1(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test1: +; CHECK: qvfrim 1, 1 + +; CHECK-FM: test1: +; CHECK-FM: qvfrim 1, 1 +} + +declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test2(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test2: +; CHECK: qvfrim 1, 1 + +; CHECK-FM: test2: +; CHECK-FM: qvfrim 1, 1 +} + +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test3(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test3: +; CHECK-NOT: qvfrin + +; CHECK-FM: test3: +; CHECK-FM-NOT: qvfrin +} + +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test4(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test4: +; CHECK-NOT: qvfrin + +; CHECK-FM: test4: +; CHECK-FM-NOT: qvfrin +} + +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test5(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test5: +; CHECK: qvfrip 1, 1 + +; CHECK-FM: test5: +; CHECK-FM: qvfrip 1, 1 +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test6(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test6: +; CHECK: qvfrip 1, 1 + +; CHECK-FM: test6: +; CHECK-FM: qvfrip 1, 1 +} + +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test9(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test9: +; CHECK: qvfriz 1, 1 + +; CHECK-FM: test9: +; CHECK-FM: qvfriz 1, 1 +} + +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test10(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test10: +; CHECK: qvfriz 1, 1 + +; CHECK-FM: test10: +; CHECK-FM: qvfriz 1, 1 +} + +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone + diff --git a/test/CodeGen/PowerPC/qpx-s-load.ll b/test/CodeGen/PowerPC/qpx-s-load.ll new file mode 100644 index 00000000000..8dfab1385dd --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-s-load.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define <4 x float> @foo(<4 x float>* %p) { +entry: + %v = load <4 x float>* %p, align 4 + ret <4 x float> %v +} + +; CHECK: @foo +; CHECK-DAG: li [[REG1:[0-9]+]], 15 +; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3 +; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]] +; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3 +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]] +; CHECK: blr + +define <4 x float> @bar(<4 x float>* %p) { +entry: + %v = load <4 x float>* %p, align 16 + ret <4 x float> %v +} + +; CHECK: @bar +; CHECK: qvlfsx + diff --git a/test/CodeGen/PowerPC/qpx-s-sel.ll b/test/CodeGen/PowerPC/qpx-s-sel.ll new file mode 100644 index 00000000000..008efea5da4 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-s-sel.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +@Q = constant <4 x i1> , align 16 +@R = global <4 x i1> , align 16 + +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone { +entry: + %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %r + +; CHECK-LABEL: @test1 +; CHECK: qvfsel 1, 3, 1, 2 +; CHECK: blr +} + +define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone { +entry: + %v = insertelement <4 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3 + %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b + ret <4 x float> %r + +; CHECK-LABEL: @test2 +; CHECK: stw +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + +define <4 x i1> @test3(<4 x i1> %a) nounwind readnone { +entry: + %v = and <4 x i1> %a, + ret <4 x i1> %v + +; CHECK-LABEL: @test3 +; CHECK: qvlfsx [[REG:[0-9]+]], +; qvflogical 1, 1, [[REG]], 1 +; blr +} + +define <4 x i1> @test4(<4 x i1> %a) nounwind { +entry: + %q = load <4 x i1>* @Q, align 16 + %v = and <4 x i1> %a, %q + ret <4 x i1> %v + +; CHECK-LABEL: @test4 +; CHECK-DAG: lbz +; CHECK-DAG: qvlfdx [[REG1:[0-9]+]], +; CHECK-DAG: stw +; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]] +; CHECK: qvflogical 1, 1, [[REG4]], 1 +; CHECK: blr +} + +define void @test5(<4 x i1> %a) nounwind { +entry: + store <4 x i1> %a, <4 x i1>* @R + ret void + +; CHECK-LABEL: @test5 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: stb +; CHECK: blr +} + +define i1 @test6(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test6 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define i1 @test7(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + %s = extractelement <4 x i1> %a, i32 3 + %q = and i1 %r, %s + ret i1 %q + +; CHECK-LABEL: @test7 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG4:[0-9]+]], +; FIXME: We're storing the vector twice, and that's silly. +; CHECK-DAG: qvstfiwx [[REG3]], +; CHECK: lwz [[REG5:[0-9]+]], +; CHECK: and 3, +; CHECK: blr +} + +define i1 @test8(<3 x i1> %a) nounwind { +entry: + %r = extractelement <3 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test8 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone { +entry: + %v = insertelement <3 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2 + %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b + ret <3 x float> %r + +; CHECK-LABEL: @test9 +; CHECK: stw +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-s-store.ll b/test/CodeGen/PowerPC/qpx-s-store.ll new file mode 100644 index 00000000000..d2ca45814da --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-s-store.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @foo(<4 x float> %v, <4 x float>* %p) { +entry: + store <4 x float> %v, <4 x float>* %p, align 4 + ret void +} + +; CHECK: @foo +; CHECK: stfs +; CHECK: stfs +; CHECK: stfs +; CHECK: stfs +; CHECK: blr + +define void @bar(<4 x float> %v, <4 x float>* %p) { +entry: + store <4 x float> %v, <4 x float>* %p, align 16 + ret void +} + +; CHECK: @bar +; CHECK: qvstfsx + diff --git a/test/CodeGen/PowerPC/qpx-sel.ll b/test/CodeGen/PowerPC/qpx-sel.ll new file mode 100644 index 00000000000..15ae57352c3 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-sel.ll @@ -0,0 +1,151 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +@Q = constant <4 x i1> , align 16 +@R = global <4 x i1> , align 16 + +define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone { +entry: + %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b + ret <4 x double> %r + +; CHECK-LABEL: @test1 +; CHECK: qvfsel 1, 3, 1, 2 +; CHECK: blr +} + +define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone { +entry: + %v = insertelement <4 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3 + %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b + ret <4 x double> %r + +; CHECK-LABEL: @test2 + +; FIXME: This load/store sequence is unnecessary. +; CHECK-DAG: lbz +; CHECK-DAG: stw + +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + +define <4 x i1> @test3(<4 x i1> %a) nounwind readnone { +entry: + %v = and <4 x i1> %a, + ret <4 x i1> %v + +; CHECK-LABEL: @test3 +; CHECK: qvlfsx [[REG:[0-9]+]], +; qvflogical 1, 1, [[REG]], 1 +; blr +} + +define <4 x i1> @test4(<4 x i1> %a) nounwind { +entry: + %q = load <4 x i1>* @Q, align 16 + %v = and <4 x i1> %a, %q + ret <4 x i1> %v + +; CHECK-LABEL: @test4 +; CHECK-DAG: lbz +; CHECK-DAG: qvlfdx [[REG1:[0-9]+]], +; CHECK-DAG: stw +; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]] +; CHECK: qvflogical 1, 1, [[REG4]], 1 +; CHECK: blr +} + +define void @test5(<4 x i1> %a) nounwind { +entry: + store <4 x i1> %a, <4 x i1>* @R + ret void + +; CHECK-LABEL: @test5 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: stb +; CHECK: blr +} + +define i1 @test6(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test6 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define i1 @test7(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + %s = extractelement <4 x i1> %a, i32 3 + %q = and i1 %r, %s + ret i1 %q + +; CHECK-LABEL: @test7 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG4:[0-9]+]], +; FIXME: We're storing the vector twice, and that's silly. +; CHECK-DAG: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG5:[0-9]+]], +; CHECK: and 3, +; CHECK: blr +} + +define i1 @test8(<3 x i1> %a) nounwind { +entry: + %r = extractelement <3 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test8 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone { +entry: + %v = insertelement <3 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2 + %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b + ret <3 x double> %r + +; CHECK-LABEL: @test9 + +; FIXME: This load/store sequence is unnecessary. +; CHECK-DAG: lbz +; CHECK-DAG: stw + +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-store.ll b/test/CodeGen/PowerPC/qpx-store.ll new file mode 100644 index 00000000000..c29cc225b05 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-store.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @foo(<4 x double> %v, <4 x double>* %p) { +entry: + store <4 x double> %v, <4 x double>* %p, align 8 + ret void +} + +; CHECK: @foo +; CHECK: stfd +; CHECK: stfd +; CHECK: stfd +; CHECK: stfd +; CHECK: blr + +define void @bar(<4 x double> %v, <4 x double>* %p) { +entry: + store <4 x double> %v, <4 x double>* %p, align 32 + ret void +} + +; CHECK: @bar +; CHECK: qvstfdx + diff --git a/test/CodeGen/PowerPC/qpx-unalperm.ll b/test/CodeGen/PowerPC/qpx-unalperm.ll new file mode 100644 index 00000000000..e765b46a7cf --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-unalperm.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mcpu=a2q | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define <4 x double> @foo(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 32 + ret <4 x double> %r +; CHECK: qvlfdx +; CHECK: blr +} + +define <4 x double> @bar(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 16 + %s = load <4 x double>* %b, align 32 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +; CHECK: qvlpcldx +; CHECK: qvlfdx +; CHECK: qvfperm +; CHECK: blr +} + +define <4 x double> @bar1(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 16 + %s = load <4 x double>* %b, align 8 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar2(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 32 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar3(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 8 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar4(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 8 + %c = getelementptr <4 x double>* %b, i32 1 + %t = load <4 x double>* %c, align 8 + %u = fadd <4 x double> %r, %s + %v = fadd <4 x double> %u, %t + ret <4 x double> %v +} + diff --git a/test/CodeGen/PowerPC/vsx-infl-copy2.ll b/test/CodeGen/PowerPC/vsx-infl-copy2.ll index 037473bdec8..0f279067159 100644 --- a/test/CodeGen/PowerPC/vsx-infl-copy2.ll +++ b/test/CodeGen/PowerPC/vsx-infl-copy2.ll @@ -8,7 +8,6 @@ entry: br i1 false, label %loop2_start, label %if.end5 ; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc -; CHECK: xxlor loop2_start: ; preds = %loop2_start, %entry br i1 undef, label %loop2_start, label %if.then.i31 diff --git a/test/MC/Disassembler/PowerPC/qpx.txt b/test/MC/Disassembler/PowerPC/qpx.txt new file mode 100644 index 00000000000..b53bb4c4d37 --- /dev/null +++ b/test/MC/Disassembler/PowerPC/qpx.txt @@ -0,0 +1,383 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s + +# CHECK: qvfabs 3, 5 +0x10 0x60 0x2a 0x10 + +# CHECK: qvfadd 3, 4, 5 +0x10 0x64 0x28 0x2a + +# CHECK: qvfadds 3, 4, 5 +0x00 0x64 0x28 0x2a + +# FIXME: decode as qvfandc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 4 +0x10 0x64 0x2a 0x08 + +# FIXME: decode as qvfand 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 1 +0x10 0x64 0x28 0x88 + +# CHECK: qvfcfid 3, 5 +0x10 0x60 0x2e 0x9c + +# CHECK: qvfcfids 3, 5 +0x00 0x60 0x2e 0x9c + +# CHECK: qvfcfidu 3, 5 +0x10 0x60 0x2f 0x9c + +# CHECK: qvfcfidus 3, 5 +0x00 0x60 0x2f 0x9c + +# FIXME: decode as qvfclr 3 +# CHECK: qvflogical 3, 3, 3, 0 +0x10 0x63 0x18 0x08 + +# CHECK: qvfcpsgn 3, 4, 5 +0x10 0x64 0x28 0x10 + +# FIXME: decode as qvfctfb 3, 4 +# CHECK: qvflogical 3, 4, 4, 5 +0x10 0x64 0x22 0x88 + +# CHECK: qvfctid 3, 5 +0x10 0x60 0x2e 0x5c + +# CHECK: qvfctidu 3, 5 +0x10 0x60 0x2f 0x5c + +# CHECK: qvfctiduz 3, 5 +0x10 0x60 0x2f 0x5e + +# CHECK: qvfctidz 3, 5 +0x10 0x60 0x2e 0x5e + +# CHECK: qvfctiw 3, 5 +0x10 0x60 0x28 0x1c + +# CHECK: qvfctiwu 3, 5 +0x10 0x60 0x29 0x1c + +# CHECK: qvfctiwuz 3, 5 +0x10 0x60 0x29 0x1e + +# CHECK: qvfctiwz 3, 5 +0x10 0x60 0x28 0x1e + +# FIXME: decode as qvfequ 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 9 +0x10 0x64 0x2c 0x88 + +# CHECK: qvflogical 3, 4, 5, 12 +0x10 0x64 0x2e 0x08 + +# CHECK: qvfmadd 3, 4, 6, 5 +0x10 0x64 0x29 0xba + +# CHECK: qvfmadds 3, 4, 6, 5 +0x00 0x64 0x29 0xba + +# CHECK: qvfmr 3, 5 +0x10 0x60 0x28 0x90 + +# CHECK: qvfmsub 3, 4, 6, 5 +0x10 0x64 0x29 0xb8 + +# CHECK: qvfmsubs 3, 4, 6, 5 +0x00 0x64 0x29 0xb8 + +# CHECK: qvfmul 3, 4, 6 +0x10 0x64 0x01 0xb2 + +# CHECK: qvfmuls 3, 4, 6 +0x00 0x64 0x01 0xb2 + +# CHECK: qvfnabs 3, 5 +0x10 0x60 0x29 0x10 + +# FIXME: decode as qvfnand 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 14 +0x10 0x64 0x2f 0x08 + +# CHECK: qvfneg 3, 5 +0x10 0x60 0x28 0x50 + +# CHECK: qvfnmadd 3, 4, 6, 5 +0x10 0x64 0x29 0xbe + +# CHECK: qvfnmadds 3, 4, 6, 5 +0x00 0x64 0x29 0xbe + +# CHECK: qvfnmsub 3, 4, 6, 5 +0x10 0x64 0x29 0xbc + +# CHECK: qvfnmsubs 3, 4, 6, 5 +0x00 0x64 0x29 0xbc + +# FIXME: decode as qvfnor 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 8 +0x10 0x64 0x2c 0x08 + +# FIXME: decode as qvfnot 3, 4 +# CHECK: qvflogical 3, 4, 4, 10 +0x10 0x64 0x25 0x08 + +# FIXME: decode as qvforc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 13 +0x10 0x64 0x2e 0x88 + +# FIXME: decode as qvfor 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 7 +0x10 0x64 0x2b 0x88 + +# CHECK: qvfperm 3, 4, 5, 6 +0x10 0x64 0x29 0x8c + +# CHECK: qvfre 3, 5 +0x10 0x60 0x28 0x30 + +# CHECK: qvfres 3, 5 +0x00 0x60 0x28 0x30 + +# CHECK: qvfrim 3, 5 +0x10 0x60 0x2b 0xd0 + +# CHECK: qvfrin 3, 5 +0x10 0x60 0x2b 0x10 + +# CHECK: qvfrip 3, 5 +0x10 0x60 0x2b 0x90 + +# CHECK: qvfriz 3, 5 +0x10 0x60 0x2b 0x50 + +# CHECK: qvfrsp 3, 5 +0x10 0x60 0x28 0x18 + +# CHECK: qvfrsqrte 3, 5 +0x10 0x60 0x28 0x34 + +# CHECK: qvfrsqrtes 3, 5 +0x00 0x60 0x28 0x34 + +# CHECK: qvfsel 3, 4, 6, 5 +0x10 0x64 0x29 0xae + +# FIXME: decode as qvfset 3 +# CHECK: qvflogical 3, 3, 3, 15 +0x10 0x63 0x1f 0x88 + +# CHECK: qvfsub 3, 4, 5 +0x10 0x64 0x28 0x28 + +# CHECK: qvfsubs 3, 4, 5 +0x00 0x64 0x28 0x28 + +# CHECK: qvfxmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x92 + +# CHECK: qvfxmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x92 + +# CHECK: qvfxmul 3, 4, 6 +0x10 0x64 0x01 0xa2 + +# CHECK: qvfxmuls 3, 4, 6 +0x00 0x64 0x01 0xa2 + +# FIXME: decode as qvfxor 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 6 +0x10 0x64 0x2b 0x08 + +# CHECK: qvfxxcpnmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x86 + +# CHECK: qvfxxcpnmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x86 + +# CHECK: qvfxxmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x82 + +# CHECK: qvfxxmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x82 + +# CHECK: qvfxxnpmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x96 + +# CHECK: qvfxxnpmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x96 + +# CHECK: qvlfcduxa 3, 9, 11 +0x7c 0x69 0x58 0xcf + +# CHECK: qvlfcdux 3, 9, 11 +0x7c 0x69 0x58 0xce + +# CHECK: qvlfcdxa 3, 10, 11 +0x7c 0x6a 0x58 0x8f + +# CHECK: qvlfcdx 3, 10, 11 +0x7c 0x6a 0x58 0x8e + +# CHECK: qvlfcsuxa 3, 9, 11 +0x7c 0x69 0x58 0x4f + +# CHECK: qvlfcsux 3, 9, 11 +0x7c 0x69 0x58 0x4e + +# CHECK: qvlfcsxa 3, 10, 11 +0x7c 0x6a 0x58 0x0f + +# CHECK: qvlfcsx 3, 10, 11 +0x7c 0x6a 0x58 0x0e + +# CHECK: qvlfduxa 3, 9, 11 +0x7c 0x69 0x5c 0xcf + +# CHECK: qvlfdux 3, 9, 11 +0x7c 0x69 0x5c 0xce + +# CHECK: qvlfdxa 3, 10, 11 +0x7c 0x6a 0x5c 0x8f + +# CHECK: qvlfdx 3, 10, 11 +0x7c 0x6a 0x5c 0x8e + +# CHECK: qvlfiwaxa 3, 10, 11 +0x7c 0x6a 0x5e 0xcf + +# CHECK: qvlfiwax 3, 10, 11 +0x7c 0x6a 0x5e 0xce + +# CHECK: qvlfiwzxa 3, 10, 11 +0x7c 0x6a 0x5e 0x8f + +# CHECK: qvlfiwzx 3, 10, 11 +0x7c 0x6a 0x5e 0x8e + +# CHECK: qvlfsuxa 3, 9, 11 +0x7c 0x69 0x5c 0x4f + +# CHECK: qvlfsux 3, 9, 11 +0x7c 0x69 0x5c 0x4e + +# CHECK: qvlfsxa 3, 10, 11 +0x7c 0x6a 0x5c 0x0f + +# CHECK: qvlfsx 3, 10, 11 +0x7c 0x6a 0x5c 0x0e + +# CHECK: qvlpcldx 3, 10, 11 +0x7c 0x6a 0x5c 0x8c + +# CHECK: qvlpclsx 3, 10, 11 +0x7c 0x6a 0x5c 0x0c + +# CHECK: qvlpcrdx 3, 10, 11 +0x7c 0x6a 0x58 0x8c + +# CHECK: qvlpcrsx 3, 10, 11 +0x7c 0x6a 0x58 0x0c + +# CHECK: qvstfcduxa 2, 9, 11 +0x7c 0x49 0x59 0xcf + +# CHECK: qvstfcduxia 2, 9, 11 +0x7c 0x49 0x59 0xcb + +# CHECK: qvstfcduxi 2, 9, 11 +0x7c 0x49 0x59 0xca + +# CHECK: qvstfcdux 2, 9, 11 +0x7c 0x49 0x59 0xce + +# CHECK: qvstfcdxa 2, 10, 11 +0x7c 0x4a 0x59 0x8f + +# CHECK: qvstfcdxia 2, 10, 11 +0x7c 0x4a 0x59 0x8b + +# CHECK: qvstfcdxi 2, 10, 11 +0x7c 0x4a 0x59 0x8a + +# CHECK: qvstfcdx 2, 10, 11 +0x7c 0x4a 0x59 0x8e + +# CHECK: qvstfcsuxa 2, 9, 11 +0x7c 0x49 0x59 0x4f + +# CHECK: qvstfcsuxia 2, 9, 11 +0x7c 0x49 0x59 0x4b + +# CHECK: qvstfcsuxi 2, 9, 11 +0x7c 0x49 0x59 0x4a + +# CHECK: qvstfcsux 2, 9, 11 +0x7c 0x49 0x59 0x4e + +# CHECK: qvstfcsxa 2, 10, 11 +0x7c 0x4a 0x59 0x0f + +# CHECK: qvstfcsxia 2, 10, 11 +0x7c 0x4a 0x59 0x0b + +# CHECK: qvstfcsxi 2, 10, 11 +0x7c 0x4a 0x59 0x0a + +# CHECK: qvstfcsx 2, 10, 11 +0x7c 0x4a 0x59 0x0e + +# CHECK: qvstfduxa 2, 9, 11 +0x7c 0x49 0x5d 0xcf + +# CHECK: qvstfduxia 2, 9, 11 +0x7c 0x49 0x5d 0xcb + +# CHECK: qvstfduxi 2, 9, 11 +0x7c 0x49 0x5d 0xca + +# CHECK: qvstfdux 2, 9, 11 +0x7c 0x49 0x5d 0xce + +# CHECK: qvstfdxa 2, 10, 11 +0x7c 0x4a 0x5d 0x8f + +# CHECK: qvstfdxia 2, 10, 11 +0x7c 0x4a 0x5d 0x8b + +# CHECK: qvstfdxi 2, 10, 11 +0x7c 0x4a 0x5d 0x8a + +# CHECK: qvstfdx 2, 10, 11 +0x7c 0x4a 0x5d 0x8e + +# CHECK: qvstfiwxa 2, 10, 11 +0x7c 0x4a 0x5f 0x8f + +# CHECK: qvstfiwx 2, 10, 11 +0x7c 0x4a 0x5f 0x8e + +# CHECK: qvstfsuxa 2, 9, 11 +0x7c 0x49 0x5d 0x4f + +# CHECK: qvstfsuxia 2, 9, 11 +0x7c 0x49 0x5d 0x4b + +# CHECK: qvstfsuxi 2, 9, 11 +0x7c 0x49 0x5d 0x4a + +# CHECK: qvstfsux 2, 9, 11 +0x7c 0x49 0x5d 0x4e + +# CHECK: qvstfsxa 2, 10, 11 +0x7c 0x4a 0x5d 0x0f + +# CHECK: qvstfsxia 2, 10, 11 +0x7c 0x4a 0x5d 0x0b + +# CHECK: qvstfsxi 2, 10, 11 +0x7c 0x4a 0x5d 0x0a + +# CHECK: qvstfsx 2, 10, 11 +0x7c 0x4a 0x5d 0x0e + diff --git a/test/MC/PowerPC/qpx.s b/test/MC/PowerPC/qpx.s new file mode 100644 index 00000000000..6c92d715877 --- /dev/null +++ b/test/MC/PowerPC/qpx.s @@ -0,0 +1,251 @@ +# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s + +# FIXME: print qvflogical aliases. + +# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10] + qvfabs 3, 5 +# CHECK: qvfadd 3, 4, 5 # encoding: [0x10,0x64,0x28,0x2a] + qvfadd 3, 4, 5 +# CHECK: qvfadds 3, 4, 5 # encoding: [0x00,0x64,0x28,0x2a] + qvfadds 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 4 # encoding: [0x10,0x64,0x2a,0x08] + qvfandc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 1 # encoding: [0x10,0x64,0x28,0x88] + qvfand 3, 4, 5 +# CHECK: qvfcfid 3, 5 # encoding: [0x10,0x60,0x2e,0x9c] + qvfcfid 3, 5 +# CHECK: qvfcfids 3, 5 # encoding: [0x00,0x60,0x2e,0x9c] + qvfcfids 3, 5 +# CHECK: qvfcfidu 3, 5 # encoding: [0x10,0x60,0x2f,0x9c] + qvfcfidu 3, 5 +# CHECK: qvfcfidus 3, 5 # encoding: [0x00,0x60,0x2f,0x9c] + qvfcfidus 3, 5 +# CHECK: qvflogical 3, 3, 3, 0 # encoding: [0x10,0x63,0x18,0x08] + qvfclr 3 +# CHECK: qvfcpsgn 3, 4, 5 # encoding: [0x10,0x64,0x28,0x10] + qvfcpsgn 3, 4, 5 +# CHECK: qvflogical 3, 4, 4, 5 # encoding: [0x10,0x64,0x22,0x88] + qvfctfb 3, 4 +# CHECK: qvfctid 3, 5 # encoding: [0x10,0x60,0x2e,0x5c] + qvfctid 3, 5 +# CHECK: qvfctidu 3, 5 # encoding: [0x10,0x60,0x2f,0x5c] + qvfctidu 3, 5 +# CHECK: qvfctiduz 3, 5 # encoding: [0x10,0x60,0x2f,0x5e] + qvfctiduz 3, 5 +# CHECK: qvfctidz 3, 5 # encoding: [0x10,0x60,0x2e,0x5e] + qvfctidz 3, 5 +# CHECK: qvfctiw 3, 5 # encoding: [0x10,0x60,0x28,0x1c] + qvfctiw 3, 5 +# CHECK: qvfctiwu 3, 5 # encoding: [0x10,0x60,0x29,0x1c] + qvfctiwu 3, 5 +# CHECK: qvfctiwuz 3, 5 # encoding: [0x10,0x60,0x29,0x1e] + qvfctiwuz 3, 5 +# CHECK: qvfctiwz 3, 5 # encoding: [0x10,0x60,0x28,0x1e] + qvfctiwz 3, 5 +# CHECK: qvflogical 3, 4, 5, 9 # encoding: [0x10,0x64,0x2c,0x88] + qvfequ 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 12 # encoding: [0x10,0x64,0x2e,0x08] + qvflogical 3, 4, 5, 12 +# CHECK: qvfmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xba] + qvfmadd 3, 4, 6, 5 +# CHECK: qvfmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xba] + qvfmadds 3, 4, 6, 5 +# CHECK: qvfmr 3, 5 # encoding: [0x10,0x60,0x28,0x90] + qvfmr 3, 5 +# CHECK: qvfmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xb8] + qvfmsub 3, 4, 6, 5 +# CHECK: qvfmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xb8] + qvfmsubs 3, 4, 6, 5 +# CHECK: qvfmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xb2] + qvfmul 3, 4, 6 +# CHECK: qvfmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xb2] + qvfmuls 3, 4, 6 +# CHECK: qvfnabs 3, 5 # encoding: [0x10,0x60,0x29,0x10] + qvfnabs 3, 5 +# CHECK: qvflogical 3, 4, 5, 14 # encoding: [0x10,0x64,0x2f,0x08] + qvfnand 3, 4, 5 +# CHECK: qvfneg 3, 5 # encoding: [0x10,0x60,0x28,0x50] + qvfneg 3, 5 +# CHECK: qvfnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbe] + qvfnmadd 3, 4, 6, 5 +# CHECK: qvfnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbe] + qvfnmadds 3, 4, 6, 5 +# CHECK: qvfnmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbc] + qvfnmsub 3, 4, 6, 5 +# CHECK: qvfnmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbc] + qvfnmsubs 3, 4, 6, 5 +# CHECK: qvflogical 3, 4, 5, 8 # encoding: [0x10,0x64,0x2c,0x08] + qvfnor 3, 4, 5 +# CHECK: qvflogical 3, 4, 4, 10 # encoding: [0x10,0x64,0x25,0x08] + qvfnot 3, 4 +# CHECK: qvflogical 3, 4, 5, 13 # encoding: [0x10,0x64,0x2e,0x88] + qvforc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 7 # encoding: [0x10,0x64,0x2b,0x88] + qvfor 3, 4, 5 +# CHECK: qvfperm 3, 4, 5, 6 # encoding: [0x10,0x64,0x29,0x8c] + qvfperm 3, 4, 5, 6 +# CHECK: qvfre 3, 5 # encoding: [0x10,0x60,0x28,0x30] + qvfre 3, 5 +# CHECK: qvfres 3, 5 # encoding: [0x00,0x60,0x28,0x30] + qvfres 3, 5 +# CHECK: qvfrim 3, 5 # encoding: [0x10,0x60,0x2b,0xd0] + qvfrim 3, 5 +# CHECK: qvfrin 3, 5 # encoding: [0x10,0x60,0x2b,0x10] + qvfrin 3, 5 +# CHECK: qvfrip 3, 5 # encoding: [0x10,0x60,0x2b,0x90] + qvfrip 3, 5 +# CHECK: qvfriz 3, 5 # encoding: [0x10,0x60,0x2b,0x50] + qvfriz 3, 5 +# CHECK: qvfrsp 3, 5 # encoding: [0x10,0x60,0x28,0x18] + qvfrsp 3, 5 +# CHECK: qvfrsqrte 3, 5 # encoding: [0x10,0x60,0x28,0x34] + qvfrsqrte 3, 5 +# CHECK: qvfrsqrtes 3, 5 # encoding: [0x00,0x60,0x28,0x34] + qvfrsqrtes 3, 5 +# CHECK: qvfsel 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xae] + qvfsel 3, 4, 6, 5 +# CHECK: qvflogical 3, 3, 3, 15 # encoding: [0x10,0x63,0x1f,0x88] + qvfset 3 +# CHECK: qvfsub 3, 4, 5 # encoding: [0x10,0x64,0x28,0x28] + qvfsub 3, 4, 5 +# CHECK: qvfsubs 3, 4, 5 # encoding: [0x00,0x64,0x28,0x28] + qvfsubs 3, 4, 5 +# CHECK: qvfxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x92] + qvfxmadd 3, 4, 6, 5 +# CHECK: qvfxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x92] + qvfxmadds 3, 4, 6, 5 +# CHECK: qvfxmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xa2] + qvfxmul 3, 4, 6 +# CHECK: qvfxmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xa2] + qvfxmuls 3, 4, 6 +# CHECK: qvflogical 3, 4, 5, 6 # encoding: [0x10,0x64,0x2b,0x08] + qvfxor 3, 4, 5 +# CHECK: qvfxxcpnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x86] + qvfxxcpnmadd 3, 4, 6, 5 +# CHECK: qvfxxcpnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x86] + qvfxxcpnmadds 3, 4, 6, 5 +# CHECK: qvfxxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x82] + qvfxxmadd 3, 4, 6, 5 +# CHECK: qvfxxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x82] + qvfxxmadds 3, 4, 6, 5 +# CHECK: qvfxxnpmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x96] + qvfxxnpmadd 3, 4, 6, 5 +# CHECK: qvfxxnpmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x96] + qvfxxnpmadds 3, 4, 6, 5 +# CHECK: qvlfcduxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xcf] + qvlfcduxa 3, 9, 11 +# CHECK: qvlfcdux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xce] + qvlfcdux 3, 9, 11 +# CHECK: qvlfcdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8f] + qvlfcdxa 3, 10, 11 +# CHECK: qvlfcdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8e] + qvlfcdx 3, 10, 11 +# CHECK: qvlfcsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4f] + qvlfcsuxa 3, 9, 11 +# CHECK: qvlfcsux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4e] + qvlfcsux 3, 9, 11 +# CHECK: qvlfcsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0f] + qvlfcsxa 3, 10, 11 +# CHECK: qvlfcsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0e] + qvlfcsx 3, 10, 11 +# CHECK: qvlfduxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xcf] + qvlfduxa 3, 9, 11 +# CHECK: qvlfdux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xce] + qvlfdux 3, 9, 11 +# CHECK: qvlfdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8f] + qvlfdxa 3, 10, 11 +# CHECK: qvlfdx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8e] + qvlfdx 3, 10, 11 +# CHECK: qvlfiwaxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xcf] + qvlfiwaxa 3, 10, 11 +# CHECK: qvlfiwax 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xce] + qvlfiwax 3, 10, 11 +# CHECK: qvlfiwzxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8f] + qvlfiwzxa 3, 10, 11 +# CHECK: qvlfiwzx 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8e] + qvlfiwzx 3, 10, 11 +# CHECK: qvlfsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4f] + qvlfsuxa 3, 9, 11 +# CHECK: qvlfsux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4e] + qvlfsux 3, 9, 11 +# CHECK: qvlfsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0f] + qvlfsxa 3, 10, 11 +# CHECK: qvlfsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0e] + qvlfsx 3, 10, 11 +# CHECK: qvlpcldx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8c] + qvlpcldx 3, 10, 11 +# CHECK: qvlpclsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0c] + qvlpclsx 3, 10, 11 +# CHECK: qvlpcrdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8c] + qvlpcrdx 3, 10, 11 +# CHECK: qvlpcrsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0c] + qvlpcrsx 3, 10, 11 +# CHECK: qvstfcduxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcf] + qvstfcduxa 2, 9, 11 +# CHECK: qvstfcduxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcb] + qvstfcduxia 2, 9, 11 +# CHECK: qvstfcduxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xca] + qvstfcduxi 2, 9, 11 +# CHECK: qvstfcdux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xce] + qvstfcdux 2, 9, 11 +# CHECK: qvstfcdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8f] + qvstfcdxa 2, 10, 11 +# CHECK: qvstfcdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8b] + qvstfcdxia 2, 10, 11 +# CHECK: qvstfcdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8a] + qvstfcdxi 2, 10, 11 +# CHECK: qvstfcdx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8e] + qvstfcdx 2, 10, 11 +# CHECK: qvstfcsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4f] + qvstfcsuxa 2, 9, 11 +# CHECK: qvstfcsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4b] + qvstfcsuxia 2, 9, 11 +# CHECK: qvstfcsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4a] + qvstfcsuxi 2, 9, 11 +# CHECK: qvstfcsux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4e] + qvstfcsux 2, 9, 11 +# CHECK: qvstfcsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0f] + qvstfcsxa 2, 10, 11 +# CHECK: qvstfcsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0b] + qvstfcsxia 2, 10, 11 +# CHECK: qvstfcsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0a] + qvstfcsxi 2, 10, 11 +# CHECK: qvstfcsx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0e] + qvstfcsx 2, 10, 11 +# CHECK: qvstfduxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcf] + qvstfduxa 2, 9, 11 +# CHECK: qvstfduxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcb] + qvstfduxia 2, 9, 11 +# CHECK: qvstfduxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xca] + qvstfduxi 2, 9, 11 +# CHECK: qvstfdux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xce] + qvstfdux 2, 9, 11 +# CHECK: qvstfdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8f] + qvstfdxa 2, 10, 11 +# CHECK: qvstfdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8b] + qvstfdxia 2, 10, 11 +# CHECK: qvstfdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8a] + qvstfdxi 2, 10, 11 +# CHECK: qvstfdx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8e] + qvstfdx 2, 10, 11 +# CHECK: qvstfiwxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8f] + qvstfiwxa 2, 10, 11 +# CHECK: qvstfiwx 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8e] + qvstfiwx 2, 10, 11 +# CHECK: qvstfsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4f] + qvstfsuxa 2, 9, 11 +# CHECK: qvstfsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4b] + qvstfsuxia 2, 9, 11 +# CHECK: qvstfsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4a] + qvstfsuxi 2, 9, 11 +# CHECK: qvstfsux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4e] + qvstfsux 2, 9, 11 +# CHECK: qvstfsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0f] + qvstfsxa 2, 10, 11 +# CHECK: qvstfsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0b] + qvstfsxia 2, 10, 11 +# CHECK: qvstfsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0a] + qvstfsxi 2, 10, 11 +# CHECK: qvstfsx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0e] + qvstfsx 2, 10, 11 +