[PowerPC] Add support for the QPX vector instruction set

This adds support for the QPX vector instruction set, which is used by the
enhanced A2 cores on the IBM BG/Q supercomputers. QPX vectors are 256 bytes
wide, holding 4 double-precision floating-point values. Boolean values, modeled
here as <4 x i1> are actually also represented as floating-point values
(essentially  { -1, 1 } for { false, true }). QPX shares many features with
Altivec and VSX, but is distinct from both of them. One major difference is
that, instead of adding completely-separate vector registers, QPX vector
registers are extensions of the scalar floating-point registers (lane 0 is the
corresponding scalar floating-point value). The operations supported on QPX
vectors mirrors that supported on the scalar floating-point values (with some
additional ones for permutations and logical/comparison operations).

I've been maintaining this support out-of-tree, as part of the bgclang project,
for several years. This is not the entire bgclang patch set, but is most of the
subset that can be cleanly integrated into LLVM proper at this time. Adding
this to the LLVM backend is part of my efforts to rebase bgclang to the current
LLVM trunk, but is independently useful (especially for codes that use LLVM as
a JIT in library form).

The assembler/disassembler test coverage is complete. The CodeGen test coverage
is not, but I've included some tests, and more will be added as follow-up work.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230413 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-02-25 01:06:45 +00:00
parent 76bdd01e0e
commit f8d179ba76
35 changed files with 4336 additions and 68 deletions

View File

@ -542,3 +542,180 @@ def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">;
def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">;
}
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsics.
//
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
/// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
: GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
}
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsic Class Definitions.
//
/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
/// vector and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and returns a v4f64.
class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and returns a v4f64 permutation.
class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and stores a v4f64.
class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[], [llvm_v4f64_ty, llvm_ptr_ty],
[IntrReadWriteArgMem]>;
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Add Instructions
def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
// Estimate Instructions
def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
// Multiply Instructions
def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
// Multiply-add instructions
def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
// Select Instruction
def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
// Permute Instruction
def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
// Convert and Round Instructions
def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
// Move Instructions
def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
// Compare Instructions
def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
// Load instructions
def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
// Store instructions
def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
// Logical and permutation formation
def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
[llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
}

View File

@ -132,6 +132,16 @@ static const MCPhysReg VSFRegs[64] = {
PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
};
static unsigned QFRegs[32] = {
PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
};
static const MCPhysReg CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@ -429,6 +439,7 @@ public:
bool isU8ImmX8() const { return Kind == Immediate &&
isUInt<8>(getImm()) &&
(getImm() & 7) == 0; }
bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
bool isU16Imm() const {
switch (Kind) {
case Expression:
@ -564,6 +575,21 @@ public:
Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
}
void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
}
void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
}
void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
}
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));

View File

@ -164,6 +164,17 @@ static const unsigned G8Regs[] = {
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
static const unsigned QFRegs[] = {
PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
};
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
const unsigned (&Regs)[N]) {
@ -235,6 +246,15 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, QFRegs);
}
#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
template<unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
@ -335,6 +355,15 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Inst =
(Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
if ((STI.getFeatureBits() & PPC::FeatureQPX) != 0) {
DecodeStatus result =
decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
MI.clear();
}
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}

View File

@ -34,7 +34,20 @@ FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
#include "PPCGenAsmWriter.inc"
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
const char *RegName = getRegisterName(RegNo);
if (RegName[0] == 'q' /* QPX */) {
// The system toolchain on the BG/Q does not understand QPX register names
// in .cfi_* directives, so print the name of the floating-point
// subregister instead.
std::string RN(RegName);
RN[0] = 'f';
OS << RN;
return;
}
OS << RegName;
}
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@ -236,6 +249,13 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned short Value = MI->getOperand(OpNo).getImm();
assert(Value <= 4095 && "Invalid u12imm argument!");
O << (unsigned short)Value;
}
void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
@ -338,6 +358,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;

View File

@ -48,6 +48,7 @@ public:
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);

View File

@ -151,6 +151,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;

View File

@ -55,13 +55,17 @@ def RetCC_PPC : CallingConv<[
// only the ELFv2 ABI fully utilizes all these registers.
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
// QPX vectors are returned in QF1 and QF2.
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
CCIfType<[v2f64, v2i64],
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;
// No explicit register is specified for the AnyReg calling convention. The
@ -108,10 +112,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
CCIfType<[v2f64, v2i64],
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;
//===----------------------------------------------------------------------===//
@ -144,6 +150,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// alignment and size as doubles.
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
// QPX vectors that are stored in double precision need 32-byte alignment.
CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
]>;
@ -158,12 +167,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
// put vector arguments in vector registers before putting them on the stack.
def CC_PPC32_SVR4 : CallingConv<[
// QPX vectors mirror the scalar FP convention.
CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
CCIfType<[v2f64, v2i64],
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9,
V10, V11, V12, V13]>>>,
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
VSH10, VSH11, VSH12, VSH13]>>,
VSH10, VSH11, VSH12, VSH13]>>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;

View File

@ -83,7 +83,7 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
(STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
STI.getPlatformStackAlignment(), 0),
Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),

View File

@ -2293,6 +2293,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
if (PPCSubTarget->hasQPX())
return nullptr;
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
@ -2468,6 +2471,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
@ -2711,6 +2716,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SelectCCOp = PPC::SELECT_CC_VSFRC;
else
SelectCCOp = PPC::SELECT_CC_F8;
else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
SelectCCOp = PPC::SELECT_CC_QFRC;
else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
SelectCCOp = PPC::SELECT_CC_QSRC;
else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
@ -3406,6 +3417,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_QFRC:
case PPC::SELECT_QSRC:
case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC: {
@ -3713,6 +3727,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_QFRC:
case PPC::SELECT_QSRC:
case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC:

File diff suppressed because it is too large Load Diff

View File

@ -283,6 +283,22 @@ namespace llvm {
/// of outputs.
XXSWAPD,
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,
/// QVGPCI = This corresponds to the QPX qvgpci instruction.
QVGPCI,
/// QVALIGNI = This corresponds to the QPX qvaligni instruction.
QVALIGNI,
/// QVESPLATI = This corresponds to the QPX qvesplati instruction.
QVESPLATI,
/// QBFLT = Access the underlying QPX floating-point boolean
/// representation.
QBFLT,
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@ -332,7 +348,11 @@ namespace llvm {
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
STXVD2X
STXVD2X,
/// QBRC, CHAIN = QVLFSb CHAIN, Ptr
/// The 4xf32 load used for v4i1 constants.
QVLFSb
};
}
@ -381,6 +401,10 @@ namespace llvm {
/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
/// If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int isQVALIGNIShuffleMask(SDNode *N);
}
class PPCTargetLowering : public TargetLowering {
@ -679,11 +703,15 @@ namespace llvm {
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,

View File

@ -562,6 +562,47 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;
let Pattern = pattern;
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRA = 0;
}
class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;
bits<4> tttt;
let Pattern = pattern;
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-24} = tttt;
let Inst{25-30} = xo;
let Inst{31} = 0;
}
class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@ -1215,6 +1256,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
// Used for QPX
class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRA = 0;
let FRC = 0;
}
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@ -1439,6 +1488,49 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{22-31} = xo;
}
// Z23-Form (used by QPX)
class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;
bits<2> idx;
let Pattern = pattern;
bit RC = 0; // set by isDOT
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-22} = idx;
let Inst{23-30} = xo;
let Inst{31} = RC;
}
class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRB = 0;
}
class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<12> idx;
let Pattern = pattern;
bit RC = 0; // set by isDOT
let Inst{6-10} = FRT;
let Inst{11-22} = idx;
let Inst{23-30} = xo;
let Inst{31} = RC;
}
//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {

View File

@ -181,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::RESTORE_CRBIT:
case PPC::LVX:
case PPC::LXVD2X:
case PPC::QVLFDX:
case PPC::QVLFSXs:
case PPC::QVLFDXb:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@ -207,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::SPILL_CRBIT:
case PPC::STVX:
case PPC::STXVD2X:
case PPC::QVSTFDX:
case PPC::QVSTFSXs:
case PPC::QVSTFDXb:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@ -759,6 +765,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::XXLOR;
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::XXLORf;
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMR;
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMRs;
else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@ -844,6 +856,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
SpillsVRS = true;
} else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@ -939,6 +969,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
DestReg),
FrameIdx));
SpillsVRS = true;
} else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
FrameIdx));
NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}

View File

@ -61,6 +61,27 @@ def tocentry32 : Operand<iPTR> {
let MIOperandInfo = (ops i32imm:$imm);
}
def SDT_PPCqvfperm : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
]>;
def SDT_PPCqvgpci : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisInt<1>
]>;
def SDT_PPCqvaligni : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
]>;
def SDT_PPCqvesplati : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
]>;
def SDT_PPCqbflt : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisVec<1>
]>;
def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@ -127,6 +148,16 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
[SDNPHasChain, SDNPMayLoad]>;
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@ -464,6 +495,15 @@ def u6imm : Operand<i32> {
let ParserMatchClass = PPCU6ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<6>";
}
def PPCU12ImmAsmOperand : AsmOperandClass {
let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
let RenderMethod = "addImmOperands";
}
def u12imm : Operand<i32> {
let PrintMethod = "printU12ImmOperand";
let ParserMatchClass = PPCU12ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<12>";
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
let RenderMethod = "addS16ImmOperands";
@ -680,6 +720,10 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@ -2643,6 +2687,7 @@ include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
include "PPCInstrQPX.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;

File diff suppressed because it is too large Load Diff

View File

@ -275,6 +275,9 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::QFRCRegClassID:
case PPC::QSRCRegClassID:
case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:

View File

@ -49,6 +49,13 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
class QFPR<FPR SubReg, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_64];
}
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
@ -114,6 +121,12 @@ foreach Index = 0-31 in {
def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
}
// QPX Floating-point registers
foreach Index = 0-31 in {
def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@ -303,6 +316,16 @@ def VFRC : RegisterClass<"PPC", [f64], 64,
VF22, VF21, VF20)>;
def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
// For QPX
def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
(sequence "QF%u", 31, 14))>;
def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
// These are actually stored as floating-point values where a positive
// number is true and anything else (including NaN) is false.
let Size = 256;
}
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,

View File

@ -37,6 +37,10 @@ using namespace llvm;
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
cl::Hidden);
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
@ -90,6 +94,7 @@ void PPCSubtarget::initializeEnvironment() {
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
IsQPXStackUnaligned = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@ -126,8 +131,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// QPX requires a 32-byte aligned stack. Note that we need to do this if
// we're compiling for a BG/Q system regardless of whether or not QPX
// is enabled because external functions will assume this alignment.
if (hasQPX() || isBGQ())
StackAlignment = 32;
IsQPXStackUnaligned = QPXStackUnaligned;
StackAlignment = getPlatformStackAlignment();
// Determine endianness.
// FIXME: Part of the TargetMachine.

View File

@ -114,6 +114,11 @@ protected:
bool HasICBT;
bool HasInvariantFunctionDescriptors;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
/// of the stack.
bool IsQPXStackUnaligned;
const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
@ -230,6 +235,14 @@ public:
return HasInvariantFunctionDescriptors;
}
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
return 32;
return 16;
}
const Triple &getTargetTriple() const { return TargetTriple; }
/// isDarwin - True if this is any darwin platform.

View File

@ -193,13 +193,14 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
}
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec())
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
@ -276,6 +277,12 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index == 0)
return 0;
return BaseT::getVectorInstrCost(Opcode, Val, Index);
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
// Floating point scalars are already located in index #0.
if (Index == 0)
return 0;
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}

View File

@ -0,0 +1,33 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
define void @s452() nounwind {
entry:
br label %for.body4
for.body4: ; preds = %for.body4, %entry
%conv.4 = sitofp i32 undef to double
%conv.5 = sitofp i32 undef to double
%mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
%mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
%mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef
%add7.4 = fadd <2 x double> undef, %mul.4
store <2 x double> %add7.4, <2 x double>* undef, align 16
br i1 undef, label %for.end, label %for.body4
for.end: ; preds = %for.body4
unreachable
; CHECK-LABEL: @s452
; CHECK: lfiwax [[REG1:[0-9]+]],
; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
; FIXME: We could 'promote' this to a vector earlier and remove this splat.
; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
; CHECK: qvfmul
; CHECK: qvfadd
; CHECK: qvesplati {{[0-9]+}},
; FIXME: We can use qvstfcdx here instead of two stores.
; CHECK: stfd
; CHECK: stfd
}

View File

@ -0,0 +1,37 @@
; RUN: llc < %s -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
%v1 = insertelement <4 x double> undef, double %f1, i32 0
%v2 = insertelement <4 x double> %v1, double %f2, i32 1
%v3 = insertelement <4 x double> %v2, double %f3, i32 2
%v4 = insertelement <4 x double> %v3, double %f4, i32 3
ret <4 x double> %v4
; CHECK-LABEL: @foo
; CHECK: qvgpci [[REG1:[0-9]+]], 275
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
; CHECK: blr
}
define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
%v1 = insertelement <4 x float> undef, float %f1, i32 0
%v2 = insertelement <4 x float> %v1, float %f2, i32 1
%v3 = insertelement <4 x float> %v2, float %f3, i32 2
%v4 = insertelement <4 x float> %v3, float %f4, i32 3
ret <4 x float> %v4
; CHECK-LABEL: @goo
; CHECK: qvgpci [[REG1:[0-9]+]], 275
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
; CHECK: blr
}

View File

@ -0,0 +1,21 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
declare <4 x double> @foo(<4 x double> %p)
define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
entry:
%v = call <4 x double> @foo(<4 x double> %p)
%w = call <4 x double> @foo(<4 x double> %q)
%x = fadd <4 x double> %v, %w
ret <4 x double> %x
; CHECK-LABEL: @bar
; CHECK: qvstfdx 2,
; CHECK: bl foo
; CHECK: qvstfdx 1,
; CHECK: qvlfdx 1,
; CHECK: bl foo
; CHECK: qvlfdx [[REG:[0-9]+]],
; CHECK: qvfadd 1, [[REG]], 1
}

View File

@ -0,0 +1,25 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
define <4 x double> @foo(<4 x double>* %p) {
entry:
%v = load <4 x double>* %p, align 8
ret <4 x double> %v
}
; CHECK: @foo
; CHECK-DAG: li [[REG1:[0-9]+]], 31
; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
; CHECK: blr
define <4 x double> @bar(<4 x double>* %p) {
entry:
%v = load <4 x double>* %p, align 32
ret <4 x double> %v
}
; CHECK: @bar
; CHECK: qvlfdx

View File

@ -0,0 +1,194 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind {
entry:
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%r = fdiv <4 x double> %a, %x
ret <4 x double> %r
; CHECK-LABEL: @foo
; CHECK: qvfrsqrte
; CHECK: qvfmul
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfmul
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfmul
; CHECK: blr
; CHECK-SAFE-LABEL: @foo
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}
define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind {
entry:
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%y = fpext <4 x float> %x to <4 x double>
%r = fdiv <4 x double> %a, %y
ret <4 x double> %r
; CHECK-LABEL: @foof
; CHECK: qvfrsqrtes
; CHECK: qvfmuls
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsubs
; CHECK: qvfmadds
; CHECK: qvfmadds
; CHECK: qvfmuls
; CHECK: qvfmul
; CHECK: blr
; CHECK-SAFE-LABEL: @foof
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}
define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind {
entry:
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%y = fptrunc <4 x double> %x to <4 x float>
%r = fdiv <4 x float> %a, %y
ret <4 x float> %r
; CHECK-LABEL: @food
; CHECK: qvfrsqrte
; CHECK: qvfmul
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfmul
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: qvfrsp
; CHECK: qvfmuls
; CHECK: blr
; CHECK-SAFE-LABEL: @food
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind {
entry:
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv <4 x float> %a, %x
ret <4 x float> %r
; CHECK-LABEL: @goo
; CHECK: qvfrsqrtes
; CHECK: qvfmuls
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsubs
; CHECK: qvfmadds
; CHECK: qvfmadds
; CHECK: qvfmuls
; CHECK: qvfmuls
; CHECK: blr
; CHECK-SAFE-LABEL: @goo
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind {
entry:
%r = fdiv <4 x double> %a, %b
ret <4 x double> %r
; CHECK-LABEL: @foo2
; CHECK: qvfre
; CHECK: qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfnmsub
; CHECK: qvfmadd
; CHECK: qvfmul
; CHECK: blr
; CHECK-SAFE-LABEL: @foo2
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}
define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind {
entry:
%r = fdiv <4 x float> %a, %b
ret <4 x float> %r
; CHECK-LABEL: @goo2
; CHECK: qvfres
; CHECK: qvfnmsubs
; CHECK: qvfmadds
; CHECK: qvfmuls
; CHECK: blr
; CHECK-SAFE-LABEL: @goo2
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define <4 x double> @foo3(<4 x double> %a) nounwind {
entry:
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %r
; CHECK-LABEL: @foo3
; CHECK: qvfrsqrte
; CHECK: qvfmul
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadd instead of a qvfnmsub
; CHECK-DAG: qvfmadd
; CHECK-DAG: qvfcmpeq
; CHECK-DAG: qvfmadd
; CHECK-DAG: qvfmul
; CHECK-DAG: qvfmul
; CHECK-DAG: qvfmadd
; CHECK-DAG: qvfmul
; CHECK-DAG: qvfmul
; CHECK: qvfsel
; CHECK: blr
; CHECK-SAFE-LABEL: @foo3
; CHECK-SAFE: fsqrt
; CHECK-SAFE: blr
}
define <4 x float> @goo3(<4 x float> %a) nounwind {
entry:
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r
; CHECK-LABEL: @goo3
; CHECK: qvfrsqrtes
; CHECK: qvfmuls
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
; an qvfmadds instead of a qvfnmsubs
; CHECK-DAG: qvfmadds
; CHECK-DAG: qvfcmpeq
; CHECK-DAG: qvfmadds
; CHECK-DAG: qvfmuls
; CHECK-DAG: qvfmuls
; CHECK: qvfsel
; CHECK: blr
; CHECK-SAFE-LABEL: @goo3
; CHECK-SAFE: fsqrts
; CHECK-SAFE: blr
}

View File

@ -0,0 +1,109 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define <4 x float> @test1(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test1:
; CHECK: qvfrim 1, 1
; CHECK-FM: test1:
; CHECK-FM: qvfrim 1, 1
}
declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test2(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test2:
; CHECK: qvfrim 1, 1
; CHECK-FM: test2:
; CHECK-FM: qvfrim 1, 1
}
declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
define <4 x float> @test3(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test3:
; CHECK-NOT: qvfrin
; CHECK-FM: test3:
; CHECK-FM-NOT: qvfrin
}
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test4(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test4:
; CHECK-NOT: qvfrin
; CHECK-FM: test4:
; CHECK-FM-NOT: qvfrin
}
declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
define <4 x float> @test5(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test5:
; CHECK: qvfrip 1, 1
; CHECK-FM: test5:
; CHECK-FM: qvfrip 1, 1
}
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test6(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test6:
; CHECK: qvfrip 1, 1
; CHECK-FM: test6:
; CHECK-FM: qvfrip 1, 1
}
declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
define <4 x float> @test9(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test9:
; CHECK: qvfriz 1, 1
; CHECK-FM: test9:
; CHECK-FM: qvfriz 1, 1
}
declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test10(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test10:
; CHECK: qvfriz 1, 1
; CHECK-FM: test10:
; CHECK-FM: qvfriz 1, 1
}
declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone

View File

@ -0,0 +1,25 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
define <4 x float> @foo(<4 x float>* %p) {
entry:
%v = load <4 x float>* %p, align 4
ret <4 x float> %v
}
; CHECK: @foo
; CHECK-DAG: li [[REG1:[0-9]+]], 15
; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
; CHECK: blr
define <4 x float> @bar(<4 x float>* %p) {
entry:
%v = load <4 x float>* %p, align 16
ret <4 x float> %v
}
; CHECK: @bar
; CHECK: qvlfsx

View File

@ -0,0 +1,143 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
entry:
%r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %r
; CHECK-LABEL: @test1
; CHECK: qvfsel 1, 3, 1, 2
; CHECK: blr
}
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
entry:
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
%r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
ret <4 x float> %r
; CHECK-LABEL: @test2
; CHECK: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}
define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
entry:
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
ret <4 x i1> %v
; CHECK-LABEL: @test3
; CHECK: qvlfsx [[REG:[0-9]+]],
; qvflogical 1, 1, [[REG]], 1
; blr
}
define <4 x i1> @test4(<4 x i1> %a) nounwind {
entry:
%q = load <4 x i1>* @Q, align 16
%v = and <4 x i1> %a, %q
ret <4 x i1> %v
; CHECK-LABEL: @test4
; CHECK-DAG: lbz
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
; CHECK: qvflogical 1, 1, [[REG4]], 1
; CHECK: blr
}
define void @test5(<4 x i1> %a) nounwind {
entry:
store <4 x i1> %a, <4 x i1>* @R
ret void
; CHECK-LABEL: @test5
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: stb
; CHECK: blr
}
define i1 @test6(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test6
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define i1 @test7(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
%s = extractelement <4 x i1> %a, i32 3
%q = and i1 %r, %s
ret i1 %q
; CHECK-LABEL: @test7
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG4:[0-9]+]],
; FIXME: We're storing the vector twice, and that's silly.
; CHECK-DAG: qvstfiwx [[REG3]],
; CHECK: lwz [[REG5:[0-9]+]],
; CHECK: and 3,
; CHECK: blr
}
define i1 @test8(<3 x i1> %a) nounwind {
entry:
%r = extractelement <3 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test8
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
entry:
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
%r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
ret <3 x float> %r
; CHECK-LABEL: @test9
; CHECK: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

View File

@ -0,0 +1,24 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
define void @foo(<4 x float> %v, <4 x float>* %p) {
entry:
store <4 x float> %v, <4 x float>* %p, align 4
ret void
}
; CHECK: @foo
; CHECK: stfs
; CHECK: stfs
; CHECK: stfs
; CHECK: stfs
; CHECK: blr
define void @bar(<4 x float> %v, <4 x float>* %p) {
entry:
store <4 x float> %v, <4 x float>* %p, align 16
ret void
}
; CHECK: @bar
; CHECK: qvstfsx

View File

@ -0,0 +1,151 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
entry:
%r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
ret <4 x double> %r
; CHECK-LABEL: @test1
; CHECK: qvfsel 1, 3, 1, 2
; CHECK: blr
}
define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
entry:
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
%r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
ret <4 x double> %r
; CHECK-LABEL: @test2
; FIXME: This load/store sequence is unnecessary.
; CHECK-DAG: lbz
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}
define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
entry:
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
ret <4 x i1> %v
; CHECK-LABEL: @test3
; CHECK: qvlfsx [[REG:[0-9]+]],
; qvflogical 1, 1, [[REG]], 1
; blr
}
define <4 x i1> @test4(<4 x i1> %a) nounwind {
entry:
%q = load <4 x i1>* @Q, align 16
%v = and <4 x i1> %a, %q
ret <4 x i1> %v
; CHECK-LABEL: @test4
; CHECK-DAG: lbz
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
; CHECK: qvflogical 1, 1, [[REG4]], 1
; CHECK: blr
}
define void @test5(<4 x i1> %a) nounwind {
entry:
store <4 x i1> %a, <4 x i1>* @R
ret void
; CHECK-LABEL: @test5
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: stb
; CHECK: blr
}
define i1 @test6(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test6
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define i1 @test7(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
%s = extractelement <4 x i1> %a, i32 3
%q = and i1 %r, %s
ret i1 %q
; CHECK-LABEL: @test7
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG4:[0-9]+]],
; FIXME: We're storing the vector twice, and that's silly.
; CHECK-DAG: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG5:[0-9]+]],
; CHECK: and 3,
; CHECK: blr
}
define i1 @test8(<3 x i1> %a) nounwind {
entry:
%r = extractelement <3 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test8
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
entry:
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
%r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
ret <3 x double> %r
; CHECK-LABEL: @test9
; FIXME: This load/store sequence is unnecessary.
; CHECK-DAG: lbz
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

View File

@ -0,0 +1,24 @@
; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
define void @foo(<4 x double> %v, <4 x double>* %p) {
entry:
store <4 x double> %v, <4 x double>* %p, align 8
ret void
}
; CHECK: @foo
; CHECK: stfd
; CHECK: stfd
; CHECK: stfd
; CHECK: stfd
; CHECK: blr
define void @bar(<4 x double> %v, <4 x double>* %p) {
entry:
store <4 x double> %v, <4 x double>* %p, align 32
ret void
}
; CHECK: @bar
; CHECK: qvstfdx

View File

@ -0,0 +1,64 @@
; RUN: llc < %s -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
define <4 x double> @foo(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 32
ret <4 x double> %r
; CHECK: qvlfdx
; CHECK: blr
}
define <4 x double> @bar(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 16
%s = load <4 x double>* %b, align 32
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
; CHECK: qvlpcldx
; CHECK: qvlfdx
; CHECK: qvfperm
; CHECK: blr
}
define <4 x double> @bar1(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 16
%s = load <4 x double>* %b, align 8
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}
define <4 x double> @bar2(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 1
%s = load <4 x double>* %b, align 32
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}
define <4 x double> @bar3(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 1
%s = load <4 x double>* %b, align 8
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}
define <4 x double> @bar4(<4 x double>* %a) {
entry:
%r = load <4 x double>* %a, align 8
%b = getelementptr <4 x double>* %a, i32 1
%s = load <4 x double>* %b, align 8
%c = getelementptr <4 x double>* %b, i32 1
%t = load <4 x double>* %c, align 8
%u = fadd <4 x double> %r, %s
%v = fadd <4 x double> %u, %t
ret <4 x double> %v
}

View File

@ -8,7 +8,6 @@ entry:
br i1 false, label %loop2_start, label %if.end5
; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc
; CHECK: xxlor
loop2_start: ; preds = %loop2_start, %entry
br i1 undef, label %loop2_start, label %if.then.i31

View File

@ -0,0 +1,383 @@
# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s
# CHECK: qvfabs 3, 5
0x10 0x60 0x2a 0x10
# CHECK: qvfadd 3, 4, 5
0x10 0x64 0x28 0x2a
# CHECK: qvfadds 3, 4, 5
0x00 0x64 0x28 0x2a
# FIXME: decode as qvfandc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 4
0x10 0x64 0x2a 0x08
# FIXME: decode as qvfand 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 1
0x10 0x64 0x28 0x88
# CHECK: qvfcfid 3, 5
0x10 0x60 0x2e 0x9c
# CHECK: qvfcfids 3, 5
0x00 0x60 0x2e 0x9c
# CHECK: qvfcfidu 3, 5
0x10 0x60 0x2f 0x9c
# CHECK: qvfcfidus 3, 5
0x00 0x60 0x2f 0x9c
# FIXME: decode as qvfclr 3
# CHECK: qvflogical 3, 3, 3, 0
0x10 0x63 0x18 0x08
# CHECK: qvfcpsgn 3, 4, 5
0x10 0x64 0x28 0x10
# FIXME: decode as qvfctfb 3, 4
# CHECK: qvflogical 3, 4, 4, 5
0x10 0x64 0x22 0x88
# CHECK: qvfctid 3, 5
0x10 0x60 0x2e 0x5c
# CHECK: qvfctidu 3, 5
0x10 0x60 0x2f 0x5c
# CHECK: qvfctiduz 3, 5
0x10 0x60 0x2f 0x5e
# CHECK: qvfctidz 3, 5
0x10 0x60 0x2e 0x5e
# CHECK: qvfctiw 3, 5
0x10 0x60 0x28 0x1c
# CHECK: qvfctiwu 3, 5
0x10 0x60 0x29 0x1c
# CHECK: qvfctiwuz 3, 5
0x10 0x60 0x29 0x1e
# CHECK: qvfctiwz 3, 5
0x10 0x60 0x28 0x1e
# FIXME: decode as qvfequ 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 9
0x10 0x64 0x2c 0x88
# CHECK: qvflogical 3, 4, 5, 12
0x10 0x64 0x2e 0x08
# CHECK: qvfmadd 3, 4, 6, 5
0x10 0x64 0x29 0xba
# CHECK: qvfmadds 3, 4, 6, 5
0x00 0x64 0x29 0xba
# CHECK: qvfmr 3, 5
0x10 0x60 0x28 0x90
# CHECK: qvfmsub 3, 4, 6, 5
0x10 0x64 0x29 0xb8
# CHECK: qvfmsubs 3, 4, 6, 5
0x00 0x64 0x29 0xb8
# CHECK: qvfmul 3, 4, 6
0x10 0x64 0x01 0xb2
# CHECK: qvfmuls 3, 4, 6
0x00 0x64 0x01 0xb2
# CHECK: qvfnabs 3, 5
0x10 0x60 0x29 0x10
# FIXME: decode as qvfnand 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 14
0x10 0x64 0x2f 0x08
# CHECK: qvfneg 3, 5
0x10 0x60 0x28 0x50
# CHECK: qvfnmadd 3, 4, 6, 5
0x10 0x64 0x29 0xbe
# CHECK: qvfnmadds 3, 4, 6, 5
0x00 0x64 0x29 0xbe
# CHECK: qvfnmsub 3, 4, 6, 5
0x10 0x64 0x29 0xbc
# CHECK: qvfnmsubs 3, 4, 6, 5
0x00 0x64 0x29 0xbc
# FIXME: decode as qvfnor 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 8
0x10 0x64 0x2c 0x08
# FIXME: decode as qvfnot 3, 4
# CHECK: qvflogical 3, 4, 4, 10
0x10 0x64 0x25 0x08
# FIXME: decode as qvforc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 13
0x10 0x64 0x2e 0x88
# FIXME: decode as qvfor 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 7
0x10 0x64 0x2b 0x88
# CHECK: qvfperm 3, 4, 5, 6
0x10 0x64 0x29 0x8c
# CHECK: qvfre 3, 5
0x10 0x60 0x28 0x30
# CHECK: qvfres 3, 5
0x00 0x60 0x28 0x30
# CHECK: qvfrim 3, 5
0x10 0x60 0x2b 0xd0
# CHECK: qvfrin 3, 5
0x10 0x60 0x2b 0x10
# CHECK: qvfrip 3, 5
0x10 0x60 0x2b 0x90
# CHECK: qvfriz 3, 5
0x10 0x60 0x2b 0x50
# CHECK: qvfrsp 3, 5
0x10 0x60 0x28 0x18
# CHECK: qvfrsqrte 3, 5
0x10 0x60 0x28 0x34
# CHECK: qvfrsqrtes 3, 5
0x00 0x60 0x28 0x34
# CHECK: qvfsel 3, 4, 6, 5
0x10 0x64 0x29 0xae
# FIXME: decode as qvfset 3
# CHECK: qvflogical 3, 3, 3, 15
0x10 0x63 0x1f 0x88
# CHECK: qvfsub 3, 4, 5
0x10 0x64 0x28 0x28
# CHECK: qvfsubs 3, 4, 5
0x00 0x64 0x28 0x28
# CHECK: qvfxmadd 3, 4, 6, 5
0x10 0x64 0x29 0x92
# CHECK: qvfxmadds 3, 4, 6, 5
0x00 0x64 0x29 0x92
# CHECK: qvfxmul 3, 4, 6
0x10 0x64 0x01 0xa2
# CHECK: qvfxmuls 3, 4, 6
0x00 0x64 0x01 0xa2
# FIXME: decode as qvfxor 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 6
0x10 0x64 0x2b 0x08
# CHECK: qvfxxcpnmadd 3, 4, 6, 5
0x10 0x64 0x29 0x86
# CHECK: qvfxxcpnmadds 3, 4, 6, 5
0x00 0x64 0x29 0x86
# CHECK: qvfxxmadd 3, 4, 6, 5
0x10 0x64 0x29 0x82
# CHECK: qvfxxmadds 3, 4, 6, 5
0x00 0x64 0x29 0x82
# CHECK: qvfxxnpmadd 3, 4, 6, 5
0x10 0x64 0x29 0x96
# CHECK: qvfxxnpmadds 3, 4, 6, 5
0x00 0x64 0x29 0x96
# CHECK: qvlfcduxa 3, 9, 11
0x7c 0x69 0x58 0xcf
# CHECK: qvlfcdux 3, 9, 11
0x7c 0x69 0x58 0xce
# CHECK: qvlfcdxa 3, 10, 11
0x7c 0x6a 0x58 0x8f
# CHECK: qvlfcdx 3, 10, 11
0x7c 0x6a 0x58 0x8e
# CHECK: qvlfcsuxa 3, 9, 11
0x7c 0x69 0x58 0x4f
# CHECK: qvlfcsux 3, 9, 11
0x7c 0x69 0x58 0x4e
# CHECK: qvlfcsxa 3, 10, 11
0x7c 0x6a 0x58 0x0f
# CHECK: qvlfcsx 3, 10, 11
0x7c 0x6a 0x58 0x0e
# CHECK: qvlfduxa 3, 9, 11
0x7c 0x69 0x5c 0xcf
# CHECK: qvlfdux 3, 9, 11
0x7c 0x69 0x5c 0xce
# CHECK: qvlfdxa 3, 10, 11
0x7c 0x6a 0x5c 0x8f
# CHECK: qvlfdx 3, 10, 11
0x7c 0x6a 0x5c 0x8e
# CHECK: qvlfiwaxa 3, 10, 11
0x7c 0x6a 0x5e 0xcf
# CHECK: qvlfiwax 3, 10, 11
0x7c 0x6a 0x5e 0xce
# CHECK: qvlfiwzxa 3, 10, 11
0x7c 0x6a 0x5e 0x8f
# CHECK: qvlfiwzx 3, 10, 11
0x7c 0x6a 0x5e 0x8e
# CHECK: qvlfsuxa 3, 9, 11
0x7c 0x69 0x5c 0x4f
# CHECK: qvlfsux 3, 9, 11
0x7c 0x69 0x5c 0x4e
# CHECK: qvlfsxa 3, 10, 11
0x7c 0x6a 0x5c 0x0f
# CHECK: qvlfsx 3, 10, 11
0x7c 0x6a 0x5c 0x0e
# CHECK: qvlpcldx 3, 10, 11
0x7c 0x6a 0x5c 0x8c
# CHECK: qvlpclsx 3, 10, 11
0x7c 0x6a 0x5c 0x0c
# CHECK: qvlpcrdx 3, 10, 11
0x7c 0x6a 0x58 0x8c
# CHECK: qvlpcrsx 3, 10, 11
0x7c 0x6a 0x58 0x0c
# CHECK: qvstfcduxa 2, 9, 11
0x7c 0x49 0x59 0xcf
# CHECK: qvstfcduxia 2, 9, 11
0x7c 0x49 0x59 0xcb
# CHECK: qvstfcduxi 2, 9, 11
0x7c 0x49 0x59 0xca
# CHECK: qvstfcdux 2, 9, 11
0x7c 0x49 0x59 0xce
# CHECK: qvstfcdxa 2, 10, 11
0x7c 0x4a 0x59 0x8f
# CHECK: qvstfcdxia 2, 10, 11
0x7c 0x4a 0x59 0x8b
# CHECK: qvstfcdxi 2, 10, 11
0x7c 0x4a 0x59 0x8a
# CHECK: qvstfcdx 2, 10, 11
0x7c 0x4a 0x59 0x8e
# CHECK: qvstfcsuxa 2, 9, 11
0x7c 0x49 0x59 0x4f
# CHECK: qvstfcsuxia 2, 9, 11
0x7c 0x49 0x59 0x4b
# CHECK: qvstfcsuxi 2, 9, 11
0x7c 0x49 0x59 0x4a
# CHECK: qvstfcsux 2, 9, 11
0x7c 0x49 0x59 0x4e
# CHECK: qvstfcsxa 2, 10, 11
0x7c 0x4a 0x59 0x0f
# CHECK: qvstfcsxia 2, 10, 11
0x7c 0x4a 0x59 0x0b
# CHECK: qvstfcsxi 2, 10, 11
0x7c 0x4a 0x59 0x0a
# CHECK: qvstfcsx 2, 10, 11
0x7c 0x4a 0x59 0x0e
# CHECK: qvstfduxa 2, 9, 11
0x7c 0x49 0x5d 0xcf
# CHECK: qvstfduxia 2, 9, 11
0x7c 0x49 0x5d 0xcb
# CHECK: qvstfduxi 2, 9, 11
0x7c 0x49 0x5d 0xca
# CHECK: qvstfdux 2, 9, 11
0x7c 0x49 0x5d 0xce
# CHECK: qvstfdxa 2, 10, 11
0x7c 0x4a 0x5d 0x8f
# CHECK: qvstfdxia 2, 10, 11
0x7c 0x4a 0x5d 0x8b
# CHECK: qvstfdxi 2, 10, 11
0x7c 0x4a 0x5d 0x8a
# CHECK: qvstfdx 2, 10, 11
0x7c 0x4a 0x5d 0x8e
# CHECK: qvstfiwxa 2, 10, 11
0x7c 0x4a 0x5f 0x8f
# CHECK: qvstfiwx 2, 10, 11
0x7c 0x4a 0x5f 0x8e
# CHECK: qvstfsuxa 2, 9, 11
0x7c 0x49 0x5d 0x4f
# CHECK: qvstfsuxia 2, 9, 11
0x7c 0x49 0x5d 0x4b
# CHECK: qvstfsuxi 2, 9, 11
0x7c 0x49 0x5d 0x4a
# CHECK: qvstfsux 2, 9, 11
0x7c 0x49 0x5d 0x4e
# CHECK: qvstfsxa 2, 10, 11
0x7c 0x4a 0x5d 0x0f
# CHECK: qvstfsxia 2, 10, 11
0x7c 0x4a 0x5d 0x0b
# CHECK: qvstfsxi 2, 10, 11
0x7c 0x4a 0x5d 0x0a
# CHECK: qvstfsx 2, 10, 11
0x7c 0x4a 0x5d 0x0e

251
test/MC/PowerPC/qpx.s Normal file
View File

@ -0,0 +1,251 @@
# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s
# FIXME: print qvflogical aliases.
# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10]
qvfabs 3, 5
# CHECK: qvfadd 3, 4, 5 # encoding: [0x10,0x64,0x28,0x2a]
qvfadd 3, 4, 5
# CHECK: qvfadds 3, 4, 5 # encoding: [0x00,0x64,0x28,0x2a]
qvfadds 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 4 # encoding: [0x10,0x64,0x2a,0x08]
qvfandc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 1 # encoding: [0x10,0x64,0x28,0x88]
qvfand 3, 4, 5
# CHECK: qvfcfid 3, 5 # encoding: [0x10,0x60,0x2e,0x9c]
qvfcfid 3, 5
# CHECK: qvfcfids 3, 5 # encoding: [0x00,0x60,0x2e,0x9c]
qvfcfids 3, 5
# CHECK: qvfcfidu 3, 5 # encoding: [0x10,0x60,0x2f,0x9c]
qvfcfidu 3, 5
# CHECK: qvfcfidus 3, 5 # encoding: [0x00,0x60,0x2f,0x9c]
qvfcfidus 3, 5
# CHECK: qvflogical 3, 3, 3, 0 # encoding: [0x10,0x63,0x18,0x08]
qvfclr 3
# CHECK: qvfcpsgn 3, 4, 5 # encoding: [0x10,0x64,0x28,0x10]
qvfcpsgn 3, 4, 5
# CHECK: qvflogical 3, 4, 4, 5 # encoding: [0x10,0x64,0x22,0x88]
qvfctfb 3, 4
# CHECK: qvfctid 3, 5 # encoding: [0x10,0x60,0x2e,0x5c]
qvfctid 3, 5
# CHECK: qvfctidu 3, 5 # encoding: [0x10,0x60,0x2f,0x5c]
qvfctidu 3, 5
# CHECK: qvfctiduz 3, 5 # encoding: [0x10,0x60,0x2f,0x5e]
qvfctiduz 3, 5
# CHECK: qvfctidz 3, 5 # encoding: [0x10,0x60,0x2e,0x5e]
qvfctidz 3, 5
# CHECK: qvfctiw 3, 5 # encoding: [0x10,0x60,0x28,0x1c]
qvfctiw 3, 5
# CHECK: qvfctiwu 3, 5 # encoding: [0x10,0x60,0x29,0x1c]
qvfctiwu 3, 5
# CHECK: qvfctiwuz 3, 5 # encoding: [0x10,0x60,0x29,0x1e]
qvfctiwuz 3, 5
# CHECK: qvfctiwz 3, 5 # encoding: [0x10,0x60,0x28,0x1e]
qvfctiwz 3, 5
# CHECK: qvflogical 3, 4, 5, 9 # encoding: [0x10,0x64,0x2c,0x88]
qvfequ 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 12 # encoding: [0x10,0x64,0x2e,0x08]
qvflogical 3, 4, 5, 12
# CHECK: qvfmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xba]
qvfmadd 3, 4, 6, 5
# CHECK: qvfmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xba]
qvfmadds 3, 4, 6, 5
# CHECK: qvfmr 3, 5 # encoding: [0x10,0x60,0x28,0x90]
qvfmr 3, 5
# CHECK: qvfmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xb8]
qvfmsub 3, 4, 6, 5
# CHECK: qvfmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xb8]
qvfmsubs 3, 4, 6, 5
# CHECK: qvfmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xb2]
qvfmul 3, 4, 6
# CHECK: qvfmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xb2]
qvfmuls 3, 4, 6
# CHECK: qvfnabs 3, 5 # encoding: [0x10,0x60,0x29,0x10]
qvfnabs 3, 5
# CHECK: qvflogical 3, 4, 5, 14 # encoding: [0x10,0x64,0x2f,0x08]
qvfnand 3, 4, 5
# CHECK: qvfneg 3, 5 # encoding: [0x10,0x60,0x28,0x50]
qvfneg 3, 5
# CHECK: qvfnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbe]
qvfnmadd 3, 4, 6, 5
# CHECK: qvfnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbe]
qvfnmadds 3, 4, 6, 5
# CHECK: qvfnmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbc]
qvfnmsub 3, 4, 6, 5
# CHECK: qvfnmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbc]
qvfnmsubs 3, 4, 6, 5
# CHECK: qvflogical 3, 4, 5, 8 # encoding: [0x10,0x64,0x2c,0x08]
qvfnor 3, 4, 5
# CHECK: qvflogical 3, 4, 4, 10 # encoding: [0x10,0x64,0x25,0x08]
qvfnot 3, 4
# CHECK: qvflogical 3, 4, 5, 13 # encoding: [0x10,0x64,0x2e,0x88]
qvforc 3, 4, 5
# CHECK: qvflogical 3, 4, 5, 7 # encoding: [0x10,0x64,0x2b,0x88]
qvfor 3, 4, 5
# CHECK: qvfperm 3, 4, 5, 6 # encoding: [0x10,0x64,0x29,0x8c]
qvfperm 3, 4, 5, 6
# CHECK: qvfre 3, 5 # encoding: [0x10,0x60,0x28,0x30]
qvfre 3, 5
# CHECK: qvfres 3, 5 # encoding: [0x00,0x60,0x28,0x30]
qvfres 3, 5
# CHECK: qvfrim 3, 5 # encoding: [0x10,0x60,0x2b,0xd0]
qvfrim 3, 5
# CHECK: qvfrin 3, 5 # encoding: [0x10,0x60,0x2b,0x10]
qvfrin 3, 5
# CHECK: qvfrip 3, 5 # encoding: [0x10,0x60,0x2b,0x90]
qvfrip 3, 5
# CHECK: qvfriz 3, 5 # encoding: [0x10,0x60,0x2b,0x50]
qvfriz 3, 5
# CHECK: qvfrsp 3, 5 # encoding: [0x10,0x60,0x28,0x18]
qvfrsp 3, 5
# CHECK: qvfrsqrte 3, 5 # encoding: [0x10,0x60,0x28,0x34]
qvfrsqrte 3, 5
# CHECK: qvfrsqrtes 3, 5 # encoding: [0x00,0x60,0x28,0x34]
qvfrsqrtes 3, 5
# CHECK: qvfsel 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xae]
qvfsel 3, 4, 6, 5
# CHECK: qvflogical 3, 3, 3, 15 # encoding: [0x10,0x63,0x1f,0x88]
qvfset 3
# CHECK: qvfsub 3, 4, 5 # encoding: [0x10,0x64,0x28,0x28]
qvfsub 3, 4, 5
# CHECK: qvfsubs 3, 4, 5 # encoding: [0x00,0x64,0x28,0x28]
qvfsubs 3, 4, 5
# CHECK: qvfxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x92]
qvfxmadd 3, 4, 6, 5
# CHECK: qvfxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x92]
qvfxmadds 3, 4, 6, 5
# CHECK: qvfxmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xa2]
qvfxmul 3, 4, 6
# CHECK: qvfxmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xa2]
qvfxmuls 3, 4, 6
# CHECK: qvflogical 3, 4, 5, 6 # encoding: [0x10,0x64,0x2b,0x08]
qvfxor 3, 4, 5
# CHECK: qvfxxcpnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x86]
qvfxxcpnmadd 3, 4, 6, 5
# CHECK: qvfxxcpnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x86]
qvfxxcpnmadds 3, 4, 6, 5
# CHECK: qvfxxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x82]
qvfxxmadd 3, 4, 6, 5
# CHECK: qvfxxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x82]
qvfxxmadds 3, 4, 6, 5
# CHECK: qvfxxnpmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x96]
qvfxxnpmadd 3, 4, 6, 5
# CHECK: qvfxxnpmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x96]
qvfxxnpmadds 3, 4, 6, 5
# CHECK: qvlfcduxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xcf]
qvlfcduxa 3, 9, 11
# CHECK: qvlfcdux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xce]
qvlfcdux 3, 9, 11
# CHECK: qvlfcdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8f]
qvlfcdxa 3, 10, 11
# CHECK: qvlfcdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8e]
qvlfcdx 3, 10, 11
# CHECK: qvlfcsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4f]
qvlfcsuxa 3, 9, 11
# CHECK: qvlfcsux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4e]
qvlfcsux 3, 9, 11
# CHECK: qvlfcsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0f]
qvlfcsxa 3, 10, 11
# CHECK: qvlfcsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0e]
qvlfcsx 3, 10, 11
# CHECK: qvlfduxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xcf]
qvlfduxa 3, 9, 11
# CHECK: qvlfdux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xce]
qvlfdux 3, 9, 11
# CHECK: qvlfdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8f]
qvlfdxa 3, 10, 11
# CHECK: qvlfdx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8e]
qvlfdx 3, 10, 11
# CHECK: qvlfiwaxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xcf]
qvlfiwaxa 3, 10, 11
# CHECK: qvlfiwax 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xce]
qvlfiwax 3, 10, 11
# CHECK: qvlfiwzxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8f]
qvlfiwzxa 3, 10, 11
# CHECK: qvlfiwzx 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8e]
qvlfiwzx 3, 10, 11
# CHECK: qvlfsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4f]
qvlfsuxa 3, 9, 11
# CHECK: qvlfsux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4e]
qvlfsux 3, 9, 11
# CHECK: qvlfsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0f]
qvlfsxa 3, 10, 11
# CHECK: qvlfsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0e]
qvlfsx 3, 10, 11
# CHECK: qvlpcldx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8c]
qvlpcldx 3, 10, 11
# CHECK: qvlpclsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0c]
qvlpclsx 3, 10, 11
# CHECK: qvlpcrdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8c]
qvlpcrdx 3, 10, 11
# CHECK: qvlpcrsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0c]
qvlpcrsx 3, 10, 11
# CHECK: qvstfcduxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcf]
qvstfcduxa 2, 9, 11
# CHECK: qvstfcduxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcb]
qvstfcduxia 2, 9, 11
# CHECK: qvstfcduxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xca]
qvstfcduxi 2, 9, 11
# CHECK: qvstfcdux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xce]
qvstfcdux 2, 9, 11
# CHECK: qvstfcdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8f]
qvstfcdxa 2, 10, 11
# CHECK: qvstfcdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8b]
qvstfcdxia 2, 10, 11
# CHECK: qvstfcdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8a]
qvstfcdxi 2, 10, 11
# CHECK: qvstfcdx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8e]
qvstfcdx 2, 10, 11
# CHECK: qvstfcsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4f]
qvstfcsuxa 2, 9, 11
# CHECK: qvstfcsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4b]
qvstfcsuxia 2, 9, 11
# CHECK: qvstfcsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4a]
qvstfcsuxi 2, 9, 11
# CHECK: qvstfcsux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4e]
qvstfcsux 2, 9, 11
# CHECK: qvstfcsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0f]
qvstfcsxa 2, 10, 11
# CHECK: qvstfcsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0b]
qvstfcsxia 2, 10, 11
# CHECK: qvstfcsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0a]
qvstfcsxi 2, 10, 11
# CHECK: qvstfcsx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0e]
qvstfcsx 2, 10, 11
# CHECK: qvstfduxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcf]
qvstfduxa 2, 9, 11
# CHECK: qvstfduxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcb]
qvstfduxia 2, 9, 11
# CHECK: qvstfduxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xca]
qvstfduxi 2, 9, 11
# CHECK: qvstfdux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xce]
qvstfdux 2, 9, 11
# CHECK: qvstfdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8f]
qvstfdxa 2, 10, 11
# CHECK: qvstfdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8b]
qvstfdxia 2, 10, 11
# CHECK: qvstfdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8a]
qvstfdxi 2, 10, 11
# CHECK: qvstfdx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8e]
qvstfdx 2, 10, 11
# CHECK: qvstfiwxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8f]
qvstfiwxa 2, 10, 11
# CHECK: qvstfiwx 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8e]
qvstfiwx 2, 10, 11
# CHECK: qvstfsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4f]
qvstfsuxa 2, 9, 11
# CHECK: qvstfsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4b]
qvstfsuxia 2, 9, 11
# CHECK: qvstfsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4a]
qvstfsuxi 2, 9, 11
# CHECK: qvstfsux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4e]
qvstfsux 2, 9, 11
# CHECK: qvstfsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0f]
qvstfsxa 2, 10, 11
# CHECK: qvstfsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0b]
qvstfsxia 2, 10, 11
# CHECK: qvstfsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0a]
qvstfsxi 2, 10, 11
# CHECK: qvstfsx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0e]
qvstfsx 2, 10, 11