[PowerPC] Initial support for the VSX instruction set

VSX is an ISA extension supported on the POWER7 and later cores that enhances
floating-point vector and scalar capabilities. Among other things, this adds
<2 x double> support and generally helps to reduce register pressure.

The interesting part of this ISA feature is the register configuration: there
are 64 new 128-bit vector registers, the 32 of which are super-registers of the
existing 32 scalar floating-point registers, and the second 32 of which overlap
with the 32 Altivec vector registers. This makes things like vector insertion
and extraction tricky: this can be free but only if we force a restriction to
the right register subclass when needed. A new "minipass" PPCVSXCopy takes care
of this (although it could do a more-optimal job of it; see the comment about
unnecessary copies below).

Please note that, currently, VSX is not enabled by default when targeting
anything because it is not yet ready for that.  The assembler and disassembler
are fully implemented and tested. However:

 - CodeGen support causes miscompiles; test-suite runtime failures:
      MultiSource/Benchmarks/FreeBench/distray/distray
      MultiSource/Benchmarks/McCat/08-main/main
      MultiSource/Benchmarks/Olden/voronoi/voronoi
      MultiSource/Benchmarks/mafft/pairlocalalign
      MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4
      SingleSource/Benchmarks/CoyoteBench/almabench
      SingleSource/Benchmarks/Misc/matmul_f64_4x4

 - The lowering currently falls back to using Altivec instructions far more
   than it should. Worse, there are some things that are scalarized through the
   stack that shouldn't be.

 - A lot of unnecessary copies make it past the optimizers, and this needs to
   be fixed.

 - Many more regression tests are needed.

Normally, I'd fix these things prior to committing, but there are some
students and other contributors who would like to work this, and so it makes
sense to move this development process upstream where it can be subject to the
regular code-review procedures.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203768 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2014-03-13 07:58:58 +00:00
parent 79c15b23c9
commit ab849adec4
22 changed files with 2071 additions and 23 deletions

View File

@ -95,6 +95,25 @@ static unsigned VRegs[32] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31 PPC::V28, PPC::V29, PPC::V30, PPC::V31
}; };
static unsigned VSRegs[64] = {
PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
};
static unsigned CRBITRegs[32] = { static unsigned CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@ -345,6 +364,11 @@ public:
return (unsigned) Imm.Val; return (unsigned) Imm.Val;
} }
unsigned getVSReg() const {
assert(isVSRegNumber() && "Invalid access!");
return (unsigned) Imm.Val;
}
unsigned getCCReg() const { unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!"); assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@ -362,6 +386,7 @@ public:
bool isToken() const { return Kind == Token; } bool isToken() const { return Kind == Token; }
bool isImm() const { return Kind == Immediate || Kind == Expression; } bool isImm() const { return Kind == Immediate || Kind == Expression; }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
@ -382,6 +407,7 @@ public:
(Kind == Immediate && isInt<16>(getImm()) && (Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); } (getImm() & 3) == 0); }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
bool isCCRegNumber() const { return (Kind == Expression bool isCCRegNumber() const { return (Kind == Expression
&& isUInt<3>(getExprCRVal())) || && isUInt<3>(getExprCRVal())) ||
(Kind == Immediate (Kind == Immediate
@ -448,6 +474,11 @@ public:
Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()])); Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
} }
void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
}
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!"); assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));

View File

@ -92,6 +92,26 @@ static const unsigned VRegs[] = {
PPC::V28, PPC::V29, PPC::V30, PPC::V31 PPC::V28, PPC::V29, PPC::V30, PPC::V31
}; };
static const unsigned VSRegs[] = {
PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
};
static const unsigned GPRegs[] = { static const unsigned GPRegs[] = {
PPC::R0, PPC::R1, PPC::R2, PPC::R3, PPC::R0, PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@ -163,6 +183,12 @@ static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, VRegs); return decodeRegisterClass(Inst, RegNo, VRegs);
} }
static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, VSRegs);
}
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address, uint64_t Address,
const void *Decoder) { const void *Decoder) {

View File

@ -199,6 +199,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O); printOperand(MI, OpNo+1, O);
} }
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 3 && "Invalid u2imm argument!");
O << (unsigned int)Value;
}
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) { raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm(); int Value = MI->getOperand(OpNo).getImm();
@ -316,7 +323,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) { switch (RegName[0]) {
case 'r': case 'r':
case 'f': case 'f':
case 'v': return RegName + 1; case 'v':
if (RegName[1] == 's')
return RegName + 2;
return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2; case 'c': if (RegName[1] == 'r') return RegName + 2;
} }

View File

@ -43,7 +43,7 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo, void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = 0); raw_ostream &O, const char *Modifier = 0);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);

View File

@ -35,6 +35,7 @@ namespace llvm {
FunctionPass *createPPCCTRLoopsVerify(); FunctionPass *createPPCCTRLoopsVerify();
#endif #endif
FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCVSXCopyPass();
FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,

View File

@ -90,7 +90,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">; "Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions">; "Enable VSX instructions",
[FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">; "Treat mftb as deprecated">;

View File

@ -129,7 +129,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) { switch (RegName[0]) {
case 'r': case 'r':
case 'f': case 'f':
case 'v': return RegName + 1; case 'v':
if (RegName[1] == 's')
return RegName + 2;
return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2; case 'c': if (RegName[1] == 'r') return RegName + 2;
} }

View File

@ -572,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
Opc = PPC::FCMPUS; Opc = PPC::FCMPUS;
} else { } else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
Opc = PPC::FCMPUD; Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
} }
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
} }
@ -640,7 +640,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified // getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code, // vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). // only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
bool HasVSX) {
switch (CC) { switch (CC) {
case ISD::SETEQ: case ISD::SETEQ:
case ISD::SETUEQ: case ISD::SETUEQ:
@ -654,7 +655,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
return PPC::VCMPEQUW; return PPC::VCMPEQUW;
// v4f32 != v4f32 could be translate to unordered not equal // v4f32 != v4f32 could be translate to unordered not equal
else if (VecVT == MVT::v4f32) else if (VecVT == MVT::v4f32)
return PPC::VCMPEQFP; return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
else if (VecVT == MVT::v2f64)
return PPC::XVCMPEQDP;
break; break;
case ISD::SETLT: case ISD::SETLT:
case ISD::SETGT: case ISD::SETGT:
@ -667,7 +670,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
else if (VecVT == MVT::v4i32) else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW; return PPC::VCMPGTSW;
else if (VecVT == MVT::v4f32) else if (VecVT == MVT::v4f32)
return PPC::VCMPGTFP; return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
else if (VecVT == MVT::v2f64)
return PPC::XVCMPGTDP;
break; break;
case ISD::SETULT: case ISD::SETULT:
case ISD::SETUGT: case ISD::SETUGT:
@ -682,17 +687,23 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
break; break;
case ISD::SETOEQ: case ISD::SETOEQ:
if (VecVT == MVT::v4f32) if (VecVT == MVT::v4f32)
return PPC::VCMPEQFP; return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
else if (VecVT == MVT::v2f64)
return PPC::XVCMPEQDP;
break; break;
case ISD::SETOLT: case ISD::SETOLT:
case ISD::SETOGT: case ISD::SETOGT:
case ISD::SETOLE: case ISD::SETOLE:
if (VecVT == MVT::v4f32) if (VecVT == MVT::v4f32)
return PPC::VCMPGTFP; return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
else if (VecVT == MVT::v2f64)
return PPC::XVCMPGTDP;
break; break;
case ISD::SETOGE: case ISD::SETOGE:
if (VecVT == MVT::v4f32) if (VecVT == MVT::v4f32)
return PPC::VCMPGEFP; return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
else if (VecVT == MVT::v2f64)
return PPC::XVCMPGEDP;
break; break;
default: default:
break; break;
@ -703,7 +714,7 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
// getVCmpEQInst: return the equal compare instruction for the specified vector // getVCmpEQInst: return the equal compare instruction for the specified vector
// type. Since this is for altivec specific code, only support the altivec // type. Since this is for altivec specific code, only support the altivec
// types (v16i8, v8i16, v4i32, and v4f32). // types (v16i8, v8i16, v4i32, and v4f32).
static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
switch (VecVT) { switch (VecVT) {
case MVT::v16i8: case MVT::v16i8:
return PPC::VCMPEQUB; return PPC::VCMPEQUB;
@ -712,13 +723,14 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
case MVT::v4i32: case MVT::v4i32:
return PPC::VCMPEQUW; return PPC::VCMPEQUW;
case MVT::v4f32: case MVT::v4f32:
return PPC::VCMPEQFP; return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
case MVT::v2f64:
return PPC::XVCMPEQDP;
default: default:
llvm_unreachable("Invalid integer vector compare condition"); llvm_unreachable("Invalid integer vector compare condition");
} }
} }
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N); SDLoc dl(N);
unsigned Imm; unsigned Imm;
@ -808,7 +820,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (LHS.getValueType().isVector()) { if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType(); EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
unsigned int VCmpInst = getVCmpInst(VT, CC); unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX());
switch (CC) { switch (CC) {
case ISD::SETEQ: case ISD::SETEQ:
@ -839,7 +851,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else { } else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
unsigned int VCmpEQInst = getVCmpEQInst(VT); unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
} }
@ -848,7 +860,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETOLE: case ISD::SETOLE:
case ISD::SETULE: { case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
unsigned int VCmpEQInst = getVCmpEQInst(VT); unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
} }

View File

@ -505,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal);
if (TM.Options.UnsafeFPMath) { if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
} }
@ -532,6 +532,40 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
if (Subtarget->hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
addRegisterClass(MVT::f64, &PPC::VSRCRegClass);
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
}
} }
if (Subtarget->has64BitSupport()) { if (Subtarget->has64BitSupport()) {
@ -2094,6 +2128,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::v8i16: case MVT::v8i16:
case MVT::v4i32: case MVT::v4i32:
case MVT::v4f32: case MVT::v4f32:
case MVT::v2f64:
RC = &PPC::VRRCRegClass; RC = &PPC::VRRCRegClass;
break; break;
} }
@ -2340,7 +2375,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
ObjectVT==MVT::v2f64) {
if (isVarArg) { if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT, MinReservedArea += CalculateStackSlotSize(ObjectVT,
@ -2497,6 +2533,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v4i32: case MVT::v4i32:
case MVT::v8i16: case MVT::v8i16:
case MVT::v16i8: case MVT::v16i8:
case MVT::v2f64:
// Note that vector arguments in registers don't reserve stack space, // Note that vector arguments in registers don't reserve stack space,
// except in varargs functions. // except in varargs functions.
if (VR_idx != Num_VR_Regs) { if (VR_idx != Num_VR_Regs) {
@ -2959,7 +2996,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
EVT ArgVT = Outs[i].VT; EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary. // Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
ArgVT==MVT::v2f64) {
if (!isVarArg && !isPPC64) { if (!isVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec // Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need. // parameters; handle those later so we know how much padding we need.
@ -4143,6 +4181,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v4i32: case MVT::v4i32:
case MVT::v8i16: case MVT::v8i16:
case MVT::v16i8: case MVT::v16i8:
case MVT::v2f64:
if (isVarArg) { if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers // These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in // when within range. The Darwin PPC ABI doc claims they also go in
@ -6917,7 +6956,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRE()) || (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
(VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
(VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function: // For the reciprocal, we need to find the zero of the function:
@ -6979,7 +7019,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
(VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function: // For the reciprocal sqrt, we need to find the zero of the function:
@ -7891,6 +7932,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() && if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() && TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
// FIXME: Update this for VSX!
(VT == MVT::v16i8 || VT == MVT::v8i16 || (VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) && VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) { LD->getAlignment() < ABIAlignment) {
@ -8314,6 +8356,9 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
} }
} else if (Constraint == "wc") { // individual CR bits. } else if (Constraint == "wc") { // individual CR bits.
return C_RegisterClass; return C_RegisterClass;
} else if (Constraint == "wa" || Constraint == "wd" ||
Constraint == "wf" || Constraint == "ws") {
return C_RegisterClass; // VSX registers.
} }
return TargetLowering::getConstraintType(Constraint); return TargetLowering::getConstraintType(Constraint);
} }
@ -8335,6 +8380,13 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
// Look at the constraint type. // Look at the constraint type.
if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
return CW_Register; // an individual CR bit. return CW_Register; // an individual CR bit.
else if ((StringRef(constraint) == "wa" ||
StringRef(constraint) == "wd" ||
StringRef(constraint) == "wf") &&
type->isVectorTy())
return CW_Register;
else if (StringRef(constraint) == "ws" && type->isDoubleTy())
return CW_Register;
switch (*constraint) { switch (*constraint) {
default: default:
@ -8393,6 +8445,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
} }
} else if (Constraint == "wc") { // an individual CR bit. } else if (Constraint == "wc") { // an individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass); return std::make_pair(0U, &PPC::CRBITRCRegClass);
} else if (Constraint == "wa" || Constraint == "wd" ||
Constraint == "wf" || Constraint == "ws") {
return std::make_pair(0U, &PPC::VSRCRegClass);
} }
std::pair<unsigned, const TargetRegisterClass*> R = std::pair<unsigned, const TargetRegisterClass*> R =

View File

@ -593,6 +593,173 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0; let A = 0;
} }
// XX*-Form (VSX)
class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<5> A;
bits<5> B;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
let Inst{11-15} = A;
let Inst{16-20} = B;
let Inst{21-30} = xo;
let Inst{31} = XT{5};
}
class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<6> XB;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
let Inst{11-15} = 0;
let Inst{16-20} = XB{4-0};
let Inst{21-29} = xo;
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> CR;
bits<6> XB;
let Pattern = pattern;
let Inst{6-8} = CR;
let Inst{9-15} = 0;
let Inst{16-20} = XB{4-0};
let Inst{21-29} = xo;
let Inst{30} = XB{5};
let Inst{31} = 0;
}
class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<6> XB;
bits<2> D;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
let Inst{11-13} = 0;
let Inst{14-15} = D;
let Inst{16-20} = XB{4-0};
let Inst{21-29} = xo;
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<6> XA;
bits<6> XB;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
let Inst{11-15} = XA{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21-28} = xo;
let Inst{29} = XA{5};
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> CR;
bits<6> XA;
bits<6> XB;
let Pattern = pattern;
let Inst{6-8} = CR;
let Inst{9-10} = 0;
let Inst{11-15} = XA{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21-28} = xo;
let Inst{29} = XA{5};
let Inst{30} = XB{5};
let Inst{31} = 0;
}
class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<6> XA;
bits<6> XB;
bits<2> D;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
let Inst{11-15} = XA{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21} = 0;
let Inst{22-23} = D;
let Inst{24-28} = xo;
let Inst{29} = XA{5};
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<6> XA;
bits<6> XB;
let Pattern = pattern;
bit RC = 0; // set by isDOT
let Inst{6-10} = XT{4-0};
let Inst{11-15} = XA{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21} = RC;
let Inst{22-28} = xo;
let Inst{29} = XA{5};
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<6> XA;
bits<6> XB;
bits<6> XC;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
let Inst{11-15} = XA{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21-25} = XC{4-0};
let Inst{26-27} = xo;
let Inst{28} = XC{5};
let Inst{29} = XA{5};
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
// DCB_Form - Form X instruction, used for dcb* instructions. // DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern> InstrItinClass itin, list<dag> pattern>

View File

@ -165,6 +165,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::RESTORE_CR: case PPC::RESTORE_CR:
case PPC::RESTORE_CRBIT: case PPC::RESTORE_CRBIT:
case PPC::LVX: case PPC::LVX:
case PPC::LXVD2X:
case PPC::RESTORE_VRSAVE: case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the // Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0). // offset which defaults to 0).
@ -190,6 +191,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::SPILL_CR: case PPC::SPILL_CR:
case PPC::SPILL_CRBIT: case PPC::SPILL_CRBIT:
case PPC::STVX: case PPC::STVX:
case PPC::STXVD2X:
case PPC::SPILL_VRSAVE: case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the // Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0). // offset which defaults to 0).
@ -655,6 +657,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL, MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const { bool KillSrc) const {
// We can end up with self copies and similar things as a result of VSX copy
// legalization. Promote (or just ignore) them here.
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (PPC::F8RCRegClass.contains(DestReg) &&
PPC::VSLRCRegClass.contains(SrcReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
if (SrcReg == SuperReg)
return;
DestReg = SuperReg;
} else if (PPC::VRRCRegClass.contains(DestReg) &&
PPC::VSHRCRegClass.contains(SrcReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
if (SrcReg == SuperReg)
return;
DestReg = SuperReg;
} else if (PPC::F8RCRegClass.contains(SrcReg) &&
PPC::VSLRCRegClass.contains(DestReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
if (DestReg == SuperReg)
return;
SrcReg = SuperReg;
} else if (PPC::VRRCRegClass.contains(SrcReg) &&
PPC::VSHRCRegClass.contains(DestReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
if (DestReg == SuperReg)
return;
SrcReg = SuperReg;
}
unsigned Opc; unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR; Opc = PPC::OR;
@ -666,6 +709,14 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::MCRF; Opc = PPC::MCRF;
else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::VOR; Opc = PPC::VOR;
else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
// FIXME: There are really two different ways this can be done, and we
// should pick the better one depending on the situation:
// 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
// issue in VSU pipeline 0.
// 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
// can go to either pipeline.
Opc = PPC::XXLOR;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR; Opc = PPC::CROR;
else else
@ -731,6 +782,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)), getKillRegState(isKill)),
FrameIdx)); FrameIdx));
NonRI = true; NonRI = true;
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() && assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin"); "VRSAVE only needs spill/restore on Darwin");
@ -818,6 +875,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
FrameIdx)); FrameIdx));
NonRI = true; NonRI = true;
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() && assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin"); "VRSAVE only needs spill/restore on Darwin");
@ -1485,6 +1546,144 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
} }
} }
#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-vsx-copy"
namespace llvm {
void initializePPCVSXCopyPass(PassRegistry&);
}
namespace {
// PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
// (Altivec and scalar floating-point registers), we need to transform the
// copies into subregister copies with other restrictions.
struct PPCVSXCopy : public MachineFunctionPass {
static char ID;
PPCVSXCopy() : MachineFunctionPass(ID) {
initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
}
const PPCTargetMachine *TM;
const PPCInstrInfo *TII;
bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
MachineRegisterInfo &MRI) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
return RC->hasSubClassEq(MRI.getRegClass(Reg));
} else if (RC->contains(Reg)) {
return true;
}
return false;
}
bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
}
bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
}
bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
}
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I) {
MachineInstr *MI = I;
if (!MI->isFullCopy())
continue;
MachineOperand &DstMO = MI->getOperand(0);
MachineOperand &SrcMO = MI->getOperand(1);
if ( IsVSReg(DstMO.getReg(), MRI) &&
!IsVSReg(SrcMO.getReg(), MRI)) {
// This is a copy *to* a VSX register from a non-VSX register.
Changed = true;
const TargetRegisterClass *SrcRC =
IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(SrcMO.getReg(), MRI) ||
IsVRReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
.addImm(1) // add 1, not 0, because there is no implicit clearing
// of the high bits.
.addOperand(SrcMO)
.addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
PPC::sub_64);
// The source of the original copy is now the new virtual register.
SrcMO.setReg(NewVReg);
} else if (!IsVSReg(DstMO.getReg(), MRI) &&
IsVSReg(SrcMO.getReg(), MRI)) {
// This is a copy *from* a VSX register to a non-VSX register.
Changed = true;
const TargetRegisterClass *DstRC =
IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(DstMO.getReg(), MRI) ||
IsVRReg(DstMO.getReg(), MRI)) &&
"Unknown destination for a VSX copy");
// Copy the VSX value into a new VSX register of the correct subclass.
unsigned NewVReg = MRI.createVirtualRegister(DstRC);
BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg)
.addOperand(SrcMO);
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
PPC::sub_64);
}
}
return Changed;
}
public:
virtual bool runOnMachineFunction(MachineFunction &MF) {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getInstrInfo();
bool Changed = false;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
return Changed;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
"PowerPC VSX Copy Legalization", false, false)
char PPCVSXCopy::ID = 0;
FunctionPass*
llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
#undef DEBUG_TYPE #undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret" #define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns"); STATISTIC(NumBCLR, "Number of early conditional returns");

View File

@ -412,6 +412,14 @@ def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand; let ParserMatchClass = PPCRegCRRCAsmOperand;
} }
def PPCU2ImmAsmOperand : AsmOperandClass {
let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
let RenderMethod = "addImmOperands";
}
def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
}
def PPCS5ImmAsmOperand : AsmOperandClass { def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands"; let RenderMethod = "addImmOperands";
@ -2431,6 +2439,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
include "PPCInstrAltivec.td" include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td" include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
def crnot : OutPatFrag<(ops node:$in), def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>; (CRNOR $in, $in)>;

View File

@ -0,0 +1,679 @@
//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the VSX extension to the PowerPC instruction set.
//
//===----------------------------------------------------------------------===//
def PPCRegVSRCAsmOperand : AsmOperandClass {
let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
}
def vsrc : RegisterOperand<VSRC> {
let ParserMatchClass = PPCRegVSRCAsmOperand;
}
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>;
let Defs = [CR6] in
def o : XX3Form_Rc<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[]>, isDOT;
}
}
def HasVSX : Predicate<"PPCSubTarget.hasVSX()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, canFoldAsLoad = 1 in {
def LXSDX : XForm_1<31, 588,
(outs vsrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
def LXVD2X : XForm_1<31, 844,
(outs vsrc:$XT), (ins memrr:$src),
"lxvd2x $XT, $src", IIC_LdStLFD,
[(set v2f64:$XT, (load xoaddr:$src))]>;
def LXVDSX : XForm_1<31, 332,
(outs vsrc:$XT), (ins memrr:$src),
"lxvdsx $XT, $src", IIC_LdStLFD, []>;
// TODO: match load + splat to lxvdsx.
def LXVW4X : XForm_1<31, 780,
(outs vsrc:$XT), (ins memrr:$src),
"lxvw4x $XT, $src", IIC_LdStLFD,
[(set v4f32:$XT, (load xoaddr:$src))]>;
}
// Store indexed instructions
let mayStore = 1 in {
def STXSDX : XX1Form<31, 716,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxsdx $XT, $dst", IIC_LdStSTFD,
[(store f64:$XT, xoaddr:$dst)]>;
def STXVD2X : XX1Form<31, 972,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvd2x $XT, $dst", IIC_LdStSTFD,
[(store v2f64:$XT, xoaddr:$dst)]>;
def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvw4x $XT, $dst", IIC_LdStSTFD,
[(store v4f32:$XT, xoaddr:$dst)]>;
}
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xsadddp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
def XSMULDP : XX3Form<60, 48,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xsmuldp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
def XVADDDP : XX3Form<60, 96,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvadddp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
def XVADDSP : XX3Form<60, 64,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvaddsp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
def XVMULDP : XX3Form<60, 112,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmuldp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
def XVMULSP : XX3Form<60, 80,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmulsp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
}
// Subtract Instructions
def XSSUBDP : XX3Form<60, 40,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xssubdp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
def XVSUBDP : XX3Form<60, 104,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvsubdp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
def XVSUBSP : XX3Form<60, 72,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvsubsp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
// FMA Instructions
def XSMADDADP : XX3Form<60, 33,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsmaddadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XSMADDMDP : XX3Form<60, 41,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
// TODO: Select between these based first on whether one of the operands has
// no further uses. We probably want to do this after scheduling but before
// register allocation.
def XSMSUBADP : XX3Form<60, 49,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsmsubadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XSMSUBMDP : XX3Form<60, 57,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XSNMADDADP : XX3Form<60, 161,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XSNMADDMDP : XX3Form<60, 169,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XSNMSUBADP : XX3Form<60, 177,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XSNMSUBMDP : XX3Form<60, 185,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMADDADP : XX3Form<60, 97,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddadp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMADDMDP : XX3Form<60, 105,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMADDASP : XX3Form<60, 65,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddasp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMADDMSP : XX3Form<60, 73,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMSUBADP : XX3Form<60, 113,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubadp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMSUBMDP : XX3Form<60, 121,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMSUBASP : XX3Form<60, 81,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubasp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVMSUBMSP : XX3Form<60, 89,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMADDADP : XX3Form<60, 225,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMADDMDP : XX3Form<60, 233,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMADDASP : XX3Form<60, 193,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMADDMSP : XX3Form<60, 201,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMSUBADP : XX3Form<60, 241,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMSUBMDP : XX3Form<60, 249,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMSUBASP : XX3Form<60, 209,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XVNMSUBMSP : XX3Form<60, 217,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
// Division Instructions
def XSDIVDP : XX3Form<60, 56,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xsdivdp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
def XSSQRTDP : XX2Form<60, 75,
(outs vsrc:$XT), (ins vsrc:$XB),
"xssqrtdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fsqrt f64:$XB))]>;
def XSREDP : XX2Form<60, 90,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsredp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfre f64:$XB))]>;
def XSRSQRTEDP : XX2Form<60, 74,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsrsqrtedp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
def XSTDIVDP : XX3Form_1<60, 61,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xstdivdp $crD, $XA, $XB", IIC_VecFP, []>;
def XSTSQRTDP : XX2Form_1<60, 106,
(outs crrc:$crD), (ins vsrc:$XB),
"xstsqrtdp $crD, $XB", IIC_VecFP, []>;
def XVDIVDP : XX3Form<60, 120,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvdivdp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
def XVDIVSP : XX3Form<60, 88,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvdivsp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
def XVSQRTDP : XX2Form<60, 203,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtdp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
def XVSQRTSP : XX2Form<60, 139,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
def XVTDIVDP : XX3Form_1<60, 125,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xvtdivdp $crD, $XA, $XB", IIC_VecFP, []>;
def XVTDIVSP : XX3Form_1<60, 93,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xvtdivsp $crD, $XA, $XB", IIC_VecFP, []>;
def XVTSQRTDP : XX2Form_1<60, 234,
(outs crrc:$crD), (ins vsrc:$XB),
"xvtsqrtdp $crD, $XB", IIC_VecFP, []>;
def XVTSQRTSP : XX2Form_1<60, 170,
(outs crrc:$crD), (ins vsrc:$XB),
"xvtsqrtsp $crD, $XB", IIC_VecFP, []>;
def XVREDP : XX2Form<60, 218,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvredp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
def XVRESP : XX2Form<60, 154,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvresp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
def XVRSQRTEDP : XX2Form<60, 202,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrsqrtedp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
def XVRSQRTESP : XX2Form<60, 138,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrsqrtesp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
// Compare Instructions
def XSCMPODP : XX3Form_1<60, 43,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xscmpodp $crD, $XA, $XB", IIC_VecFPCompare, []>;
def XSCMPUDP : XX3Form_1<60, 35,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xscmpudp $crD, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPEQDP : XX3Form_Rcr<60, 99,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPEQSP : XX3Form_Rcr<60, 67,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPGEDP : XX3Form_Rcr<60, 115,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPGESP : XX3Form_Rcr<60, 83,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPGTDP : XX3Form_Rcr<60, 107,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPGTSP : XX3Form_Rcr<60, 75,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
// Move Instructions
def XSABSDP : XX2Form<60, 345,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsabsdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fabs f64:$XB))]>;
def XSNABSDP : XX2Form<60, 361,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsnabsdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fabs f64:$XB)))]>;
def XSNEGDP : XX2Form<60, 377,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsnegdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fneg f64:$XB))]>;
def XSCPSGNDP : XX3Form<60, 176,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xscpsgndp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
def XVABSDP : XX2Form<60, 473,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvabsdp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (fabs v2f64:$XB))]>;
def XVABSSP : XX2Form<60, 409,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvabssp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fabs v4f32:$XB))]>;
def XVCPSGNDP : XX3Form<60, 240,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
[(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
def XVCPSGNSP : XX3Form<60, 208,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
[(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
def XVNABSDP : XX2Form<60, 489,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvnabsdp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
def XVNABSSP : XX2Form<60, 425,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvnabssp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
def XVNEGDP : XX2Form<60, 505,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvnegdp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (fneg v2f64:$XB))]>;
def XVNEGSP : XX2Form<60, 441,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvnegsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fneg v4f32:$XB))]>;
// Conversion Instructions
def XSCVDPSP : XX2Form<60, 265,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvdpsp $XT, $XB", IIC_VecFP, []>;
def XSCVDPSXDS : XX2Form<60, 344,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvdpsxds $XT, $XB", IIC_VecFP, []>;
def XSCVDPSXWS : XX2Form<60, 88,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvdpsxws $XT, $XB", IIC_VecFP, []>;
def XSCVDPUXDS : XX2Form<60, 328,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvdpuxds $XT, $XB", IIC_VecFP, []>;
def XSCVDPUXWS : XX2Form<60, 72,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvdpuxws $XT, $XB", IIC_VecFP, []>;
def XSCVSPDP : XX2Form<60, 329,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvspdp $XT, $XB", IIC_VecFP, []>;
def XSCVSXDDP : XX2Form<60, 376,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvsxddp $XT, $XB", IIC_VecFP, []>;
def XSCVUXDDP : XX2Form<60, 360,
(outs vsrc:$XT), (ins vsrc:$XB),
"xscvuxddp $XT, $XB", IIC_VecFP, []>;
def XVCVDPSP : XX2Form<60, 393,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsp $XT, $XB", IIC_VecFP, []>;
def XVCVDPSXDS : XX2Form<60, 472,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsxds $XT, $XB", IIC_VecFP, []>;
def XVCVDPSXWS : XX2Form<60, 216,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
def XVCVDPUXDS : XX2Form<60, 456,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpuxds $XT, $XB", IIC_VecFP, []>;
def XVCVDPUXWS : XX2Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
def XVCVSPDP : XX2Form<60, 457,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspdp $XT, $XB", IIC_VecFP, []>;
def XVCVSPSXDS : XX2Form<60, 408,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspsxds $XT, $XB", IIC_VecFP, []>;
def XVCVSPSXWS : XX2Form<60, 152,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspsxws $XT, $XB", IIC_VecFP, []>;
def XVCVSPUXDS : XX2Form<60, 392,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspuxds $XT, $XB", IIC_VecFP, []>;
def XVCVSPUXWS : XX2Form<60, 136,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspuxws $XT, $XB", IIC_VecFP, []>;
def XVCVSXDDP : XX2Form<60, 504,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxddp $XT, $XB", IIC_VecFP, []>;
def XVCVSXDSP : XX2Form<60, 440,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
def XVCVSXWDP : XX2Form<60, 248,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
def XVCVSXWSP : XX2Form<60, 184,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
def XVCVUXDDP : XX2Form<60, 488,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxddp $XT, $XB", IIC_VecFP, []>;
def XVCVUXDSP : XX2Form<60, 424,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
def XVCVUXWDP : XX2Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
def XVCVUXWSP : XX2Form<60, 168,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
// Rounding Instructions
def XSRDPI : XX2Form<60, 73,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsrdpi $XT, $XB", IIC_VecFP,
[(set f64:$XT, (frnd f64:$XB))]>;
def XSRDPIC : XX2Form<60, 107,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsrdpic $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fnearbyint f64:$XB))]>;
def XSRDPIM : XX2Form<60, 121,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsrdpim $XT, $XB", IIC_VecFP,
[(set f64:$XT, (ffloor f64:$XB))]>;
def XSRDPIP : XX2Form<60, 105,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsrdpip $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fceil f64:$XB))]>;
def XSRDPIZ : XX2Form<60, 89,
(outs vsrc:$XT), (ins vsrc:$XB),
"xsrdpiz $XT, $XB", IIC_VecFP,
[(set f64:$XT, (ftrunc f64:$XB))]>;
def XVRDPI : XX2Form<60, 201,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpi $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (frnd v2f64:$XB))]>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpic $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
def XVRDPIM : XX2Form<60, 249,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpim $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (ffloor v2f64:$XB))]>;
def XVRDPIP : XX2Form<60, 233,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpip $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (fceil v2f64:$XB))]>;
def XVRDPIZ : XX2Form<60, 217,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpiz $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
def XVRSPI : XX2Form<60, 137,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspi $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (frnd v4f32:$XB))]>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspic $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
def XVRSPIM : XX2Form<60, 185,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspim $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (ffloor v4f32:$XB))]>;
def XVRSPIP : XX2Form<60, 169,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspip $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fceil v4f32:$XB))]>;
def XVRSPIZ : XX2Form<60, 153,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspiz $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
// Max/Min Instructions
def XSMAXDP : XX3Form<60, 160,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
def XSMINDP : XX3Form<60, 168,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
def XVMAXDP : XX3Form<60, 224,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
def XVMINDP : XX3Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
def XVMAXSP : XX3Form<60, 192,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
def XVMINSP : XX3Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
} // Uses = [RM]
// Logical Instructions
def XXLAND : XX3Form<60, 130,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxland $XT, $XA, $XB", IIC_VecGeneral, []>;
def XXLANDC : XX3Form<60, 138,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlandc $XT, $XA, $XB", IIC_VecGeneral, []>;
def XXLNOR : XX3Form<60, 162,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlnor $XT, $XA, $XB", IIC_VecGeneral, []>;
def XXLOR : XX3Form<60, 146,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
def XXLXOR : XX3Form<60, 154,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlxor $XT, $XA, $XB", IIC_VecGeneral, []>;
// Permutation Instructions
def XXMRGHW : XX3Form<60, 18,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
def XXMRGLW : XX3Form<60, 50,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
def XXPERMDI : XX3Form_2<60, 10,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
def XXSEL : XX4Form<60, 3,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
"xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
def XXSLDWI : XX3Form_2<60, 2,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
"xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
def XXSPLTW : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
} // neverHasSideEffects
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
(XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
def : InstAlias<"xvmovsp $XT, $XB",
(XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
def : InstAlias<"xxspltd $XT, $XB, 0",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
def : InstAlias<"xxspltd $XT, $XB, 1",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
def : InstAlias<"xxmrghd $XT, $XA, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
def : InstAlias<"xxmrgld $XT, $XA, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), $A, sub_64)>;
def : Pat<(f64 (vector_extract v2f64:$S, 0)),
(EXTRACT_SUBREG (v2f64 (COPY_TO_REGCLASS $S, VSLRC)), sub_64)>;
def : Pat<(f64 (vector_extract v2f64:$S, 1)),
(EXTRACT_SUBREG (v2f64 (COPY_TO_REGCLASS (XXPERMDI $S, $S, 3),
VSLRC)), sub_64)>;
// Additional fnmsub patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
(XSNMSUBADP $B, $C, $A)>;
def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
(XSNMSUBADP $B, $C, $A)>;
def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
(XVNMSUBADP $B, $C, $A)>;
def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
(XVNMSUBADP $B, $C, $A)>;
def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
(XVNMSUBASP $B, $C, $A)>;
def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
(XVNMSUBASP $B, $C, $A)>;
def : Pat<(v2f64 (bitconvert v4i32:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
def : Pat<(v2f64 (bitconvert v8i16:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
def : Pat<(v2f64 (bitconvert v16i8:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
def : Pat<(v4i32 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
def : Pat<(v8i16 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
def : Pat<(v16i8 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
} // AddedComplexity
} // HasVSX

View File

@ -229,7 +229,11 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F8RCRegClassID: case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID: case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID: case PPC::VRRCRegClassID:
case PPC::VSLRCRegClassID:
case PPC::VSHRCRegClassID:
return 32 - DefaultSafety; return 32 - DefaultSafety;
case PPC::VSRCRegClassID:
return 64 - DefaultSafety;
case PPC::CRRCRegClassID: case PPC::CRRCRegClassID:
return 8 - DefaultSafety; return 8 - DefaultSafety;
} }

View File

@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>; def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>; def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>; def sub_32 : SubRegIndex<32>;
def sub_64 : SubRegIndex<64>;
def sub_128 : SubRegIndex<128>;
} }
@ -52,6 +54,23 @@ class VR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num; let HWEncoding{4-0} = num;
} }
// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
// floating-point registers.
class VSRL<FPR SubReg, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_64];
}
// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
// registers.
class VSRH<VR SubReg, string n> : PPCReg<n> {
let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
let HWEncoding{5} = 1;
let SubRegs = [SubReg];
let SubRegIndices = [sub_128];
}
// CR - One of the 8 4-bit condition registers // CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num; let HWEncoding{2-0} = num;
@ -86,6 +105,16 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
} }
// VSX registers
foreach Index = 0-31 in {
def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
DwarfRegAlias<!cast<FPR>("F"#Index)>;
}
foreach Index = 0-31 in {
def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
DwarfRegAlias<!cast<VR>("V"#Index)>;
}
// The reprsentation of r0 when treated as the constant 0. // The reprsentation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">; def ZERO : GPR<0, "0">;
def ZERO8 : GP8<ZERO, "0">; def ZERO8 : GP8<ZERO, "0">;
@ -204,6 +233,20 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
// VSX register classes (the allocation order mirrors that of the corresponding
// subregister classes).
def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
(add (sequence "VSL%u", 0, 13),
(sequence "VSL%u", 31, 14))>;
def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
(add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
VSH22, VSH21, VSH20)>;
def VSRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
(add VSLRC, VSHRC)>;
def CRBITRC : RegisterClass<"PPC", [i1], 32, def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN, (add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN, CR3LT, CR3GT, CR3EQ, CR3UN,

View File

@ -88,6 +88,7 @@ void PPCSubtarget::initializeEnvironment() {
UseCRBits = false; UseCRBits = false;
HasAltivec = false; HasAltivec = false;
HasQPX = false; HasQPX = false;
HasVSX = false;
HasFCPSGN = false; HasFCPSGN = false;
HasFSQRT = false; HasFSQRT = false;
HasFRE = false; HasFRE = false;

View File

@ -178,6 +178,7 @@ public:
bool hasFPCVT() const { return HasFPCVT; } bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; } bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; } bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool hasMFOCRF() const { return HasMFOCRF; } bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; } bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasPOPCNTD() const { return HasPOPCNTD; }

View File

@ -160,6 +160,8 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify()); addPass(createPPCCTRLoopsVerify());
#endif #endif
addPass(createPPCVSXCopyPass());
return false; return false;
} }

View File

@ -139,7 +139,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec()) if (Vector && !ST->hasAltivec())
return 0; return 0;
return 32; return ST->hasVSX() ? 64 : 32;
} }
unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
@ -208,6 +208,14 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode"); assert(ISD && "Invalid opcode");
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
// Double-precision scalars are already located in index #0.
if (Index == 0)
return 0;
return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
}
// Estimated cost of a load-hit-store delay. This was obtained // Estimated cost of a load-hit-store delay. This was obtained
// experimentally as a minimum needed to prevent unprofitable // experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be // vectorization for the paq8p benchmark. It may need to be

View File

@ -0,0 +1,46 @@
; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define double @test1(double %a, double %b) {
entry:
%v = fmul double %a, %b
ret double %v
; CHECK-LABEL: @test1
; CHECK: xsmuldp 1, 1, 2
; CHECK: blr
}
define double @test2(double %a, double %b) {
entry:
%v = fdiv double %a, %b
ret double %v
; CHECK-LABEL: @test2
; CHECK: xsdivdp 1, 1, 2
; CHECK: blr
}
define double @test3(double %a, double %b) {
entry:
%v = fadd double %a, %b
ret double %v
; CHECK-LABEL: @test3
; CHECK: xsadddp 1, 1, 2
; CHECK: blr
}
define <2 x double> @test4(<2 x double> %a, <2 x double> %b) {
entry:
%v = fadd <2 x double> %a, %b
ret <2 x double> %v
; FIXME: Check that the ABI for the return value is correct here!
; CHECK-LABEL: @test4
; CHECK: xvadddp {{[0-9]+}}, 34, 35
; CHECK: blr
}

View File

@ -0,0 +1,452 @@
# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
# CHECK: lxsdx 7, 5, 31
0x7c 0xe5 0xfc 0x98
# CHECK: lxvd2x 7, 5, 31
0x7c 0xe5 0xfe 0x98
# CHECK: lxvdsx 7, 5, 31
0x7c 0xe5 0xfa 0x98
# CHECK: lxvw4x 7, 5, 31
0x7c 0xe5 0xfe 0x18
# CHECK: stxsdx 8, 5, 31
0x7d 0x05 0xfd 0x98
# CHECK: stxvd2x 8, 5, 31
0x7d 0x05 0xff 0x98
# CHECK: stxvw4x 8, 5, 31
0x7d 0x05 0xff 0x18
# CHECK: xsabsdp 7, 27
0xf0 0xe0 0xdd 0x64
# CHECK: xsadddp 7, 63, 27
0xf0 0xff 0xd9 0x04
# CHECK: xscmpodp 6, 63, 27
0xf3 0x1f 0xd9 0x5c
# CHECK: xscmpudp 6, 63, 27
0xf3 0x1f 0xd9 0x1c
# CHECK: xscpsgndp 7, 63, 27
0xf0 0xff 0xdd 0x84
# CHECK: xscvdpsp 7, 27
0xf0 0xe0 0xdc 0x24
# CHECK: xscvdpsxds 7, 27
0xf0 0xe0 0xdd 0x60
# CHECK: xscvdpsxws 7, 27
0xf0 0xe0 0xd9 0x60
# CHECK: xscvdpuxds 7, 27
0xf0 0xe0 0xdd 0x20
# CHECK: xscvdpuxws 7, 27
0xf0 0xe0 0xd9 0x20
# CHECK: xscvspdp 7, 27
0xf0 0xe0 0xdd 0x24
# CHECK: xscvsxddp 7, 27
0xf0 0xe0 0xdd 0xe0
# CHECK: xscvuxddp 7, 27
0xf0 0xe0 0xdd 0xa0
# CHECK: xsdivdp 7, 63, 27
0xf0 0xff 0xd9 0xc4
# CHECK: xsmaddadp 7, 63, 27
0xf0 0xff 0xd9 0x0c
# CHECK: xsmaddmdp 7, 63, 27
0xf0 0xff 0xd9 0x4c
# CHECK: xsmaxdp 7, 63, 27
0xf0 0xff 0xdd 0x04
# CHECK: xsmindp 7, 63, 27
0xf0 0xff 0xdd 0x44
# CHECK: xsmsubadp 7, 63, 27
0xf0 0xff 0xd9 0x8c
# CHECK: xsmsubmdp 7, 63, 27
0xf0 0xff 0xd9 0xcc
# CHECK: xsmuldp 7, 63, 27
0xf0 0xff 0xd9 0x84
# CHECK: xsnabsdp 7, 27
0xf0 0xe0 0xdd 0xa4
# CHECK: xsnegdp 7, 27
0xf0 0xe0 0xdd 0xe4
# CHECK: xsnmaddadp 7, 63, 27
0xf0 0xff 0xdd 0x0c
# CHECK: xsnmaddmdp 7, 63, 27
0xf0 0xff 0xdd 0x4c
# CHECK: xsnmsubadp 7, 63, 27
0xf0 0xff 0xdd 0x8c
# CHECK: xsnmsubmdp 7, 63, 27
0xf0 0xff 0xdd 0xcc
# CHECK: xsrdpi 7, 27
0xf0 0xe0 0xd9 0x24
# CHECK: xsrdpic 7, 27
0xf0 0xe0 0xd9 0xac
# CHECK: xsrdpim 7, 27
0xf0 0xe0 0xd9 0xe4
# CHECK: xsrdpip 7, 27
0xf0 0xe0 0xd9 0xa4
# CHECK: xsrdpiz 7, 27
0xf0 0xe0 0xd9 0x64
# CHECK: xsredp 7, 27
0xf0 0xe0 0xd9 0x68
# CHECK: xsrsqrtedp 7, 27
0xf0 0xe0 0xd9 0x28
# CHECK: xssqrtdp 7, 27
0xf0 0xe0 0xd9 0x2c
# CHECK: xssubdp 7, 63, 27
0xf0 0xff 0xd9 0x44
# CHECK: xstdivdp 6, 63, 27
0xf3 0x1f 0xd9 0xec
# CHECK: xstsqrtdp 6, 27
0xf3 0x00 0xd9 0xa8
# CHECK: xvabsdp 7, 27
0xf0 0xe0 0xdf 0x64
# CHECK: xvabssp 7, 27
0xf0 0xe0 0xde 0x64
# CHECK: xvadddp 7, 63, 27
0xf0 0xff 0xdb 0x04
# CHECK: xvaddsp 7, 63, 27
0xf0 0xff 0xda 0x04
# CHECK: xvcmpeqdp 7, 63, 27
0xf0 0xff 0xdb 0x1c
# CHECK: xvcmpeqdp. 7, 63, 27
0xf0 0xff 0xdf 0x1c
# CHECK: xvcmpeqsp 7, 63, 27
0xf0 0xff 0xda 0x1c
# CHECK: xvcmpeqsp. 7, 63, 27
0xf0 0xff 0xde 0x1c
# CHECK: xvcmpgedp 7, 63, 27
0xf0 0xff 0xdb 0x9c
# CHECK: xvcmpgedp. 7, 63, 27
0xf0 0xff 0xdf 0x9c
# CHECK: xvcmpgesp 7, 63, 27
0xf0 0xff 0xda 0x9c
# CHECK: xvcmpgesp. 7, 63, 27
0xf0 0xff 0xde 0x9c
# CHECK: xvcmpgtdp 7, 63, 27
0xf0 0xff 0xdb 0x5c
# CHECK: xvcmpgtdp. 7, 63, 27
0xf0 0xff 0xdf 0x5c
# CHECK: xvcmpgtsp 7, 63, 27
0xf0 0xff 0xda 0x5c
# CHECK: xvcmpgtsp. 7, 63, 27
0xf0 0xff 0xde 0x5c
# CHECK: xvcpsgndp 7, 63, 27
0xf0 0xff 0xdf 0x84
# CHECK: xvcpsgnsp 7, 63, 27
0xf0 0xff 0xde 0x84
# CHECK: xvcvdpsp 7, 27
0xf0 0xe0 0xde 0x24
# CHECK: xvcvdpsxds 7, 27
0xf0 0xe0 0xdf 0x60
# CHECK: xvcvdpsxws 7, 27
0xf0 0xe0 0xdb 0x60
# CHECK: xvcvdpuxds 7, 27
0xf0 0xe0 0xdf 0x20
# CHECK: xvcvdpuxws 7, 27
0xf0 0xe0 0xdb 0x20
# CHECK: xvcvspdp 7, 27
0xf0 0xe0 0xdf 0x24
# CHECK: xvcvspsxds 7, 27
0xf0 0xe0 0xde 0x60
# CHECK: xvcvspsxws 7, 27
0xf0 0xe0 0xda 0x60
# CHECK: xvcvspuxds 7, 27
0xf0 0xe0 0xde 0x20
# CHECK: xvcvspuxws 7, 27
0xf0 0xe0 0xda 0x20
# CHECK: xvcvsxddp 7, 27
0xf0 0xe0 0xdf 0xe0
# CHECK: xvcvsxdsp 7, 27
0xf0 0xe0 0xde 0xe0
# CHECK: xvcvsxwdp 7, 27
0xf0 0xe0 0xdb 0xe0
# CHECK: xvcvsxwsp 7, 27
0xf0 0xe0 0xda 0xe0
# CHECK: xvcvuxddp 7, 27
0xf0 0xe0 0xdf 0xa0
# CHECK: xvcvuxdsp 7, 27
0xf0 0xe0 0xde 0xa0
# CHECK: xvcvuxwdp 7, 27
0xf0 0xe0 0xdb 0xa0
# CHECK: xvcvuxwsp 7, 27
0xf0 0xe0 0xda 0xa0
# CHECK: xvdivdp 7, 63, 27
0xf0 0xff 0xdb 0xc4
# CHECK: xvdivsp 7, 63, 27
0xf0 0xff 0xda 0xc4
# CHECK: xvmaddadp 7, 63, 27
0xf0 0xff 0xdb 0x0c
# CHECK: xvmaddasp 7, 63, 27
0xf0 0xff 0xda 0x0c
# CHECK: xvmaddmdp 7, 63, 27
0xf0 0xff 0xdb 0x4c
# CHECK: xvmaddmsp 7, 63, 27
0xf0 0xff 0xda 0x4c
# CHECK: xvmaxdp 7, 63, 27
0xf0 0xff 0xdf 0x04
# CHECK: xvmaxsp 7, 63, 27
0xf0 0xff 0xde 0x04
# CHECK: xvmindp 7, 63, 27
0xf0 0xff 0xdf 0x44
# CHECK: xvminsp 7, 63, 27
0xf0 0xff 0xde 0x44
# FIXME: decode as xvmovdp 7, 63
# CHECK: xvcpsgndp 7, 63, 63
0xf0 0xff 0xff 0x86
# FIXME: decode as xvmovsp 7, 63
# CHECK: xvcpsgnsp 7, 63, 63
0xf0 0xff 0xfe 0x86
# CHECK: xvmsubadp 7, 63, 27
0xf0 0xff 0xdb 0x8c
# CHECK: xvmsubasp 7, 63, 27
0xf0 0xff 0xda 0x8c
# CHECK: xvmsubmdp 7, 63, 27
0xf0 0xff 0xdb 0xcc
# CHECK: xvmsubmsp 7, 63, 27
0xf0 0xff 0xda 0xcc
# CHECK: xvmuldp 7, 63, 27
0xf0 0xff 0xdb 0x84
# CHECK: xvmulsp 7, 63, 27
0xf0 0xff 0xda 0x84
# CHECK: xvnabsdp 7, 27
0xf0 0xe0 0xdf 0xa4
# CHECK: xvnabssp 7, 27
0xf0 0xe0 0xde 0xa4
# CHECK: xvnegdp 7, 27
0xf0 0xe0 0xdf 0xe4
# CHECK: xvnegsp 7, 27
0xf0 0xe0 0xde 0xe4
# CHECK: xvnmaddadp 7, 63, 27
0xf0 0xff 0xdf 0x0c
# CHECK: xvnmaddasp 7, 63, 27
0xf0 0xff 0xde 0x0c
# CHECK: xvnmaddmdp 7, 63, 27
0xf0 0xff 0xdf 0x4c
# CHECK: xvnmaddmsp 7, 63, 27
0xf0 0xff 0xde 0x4c
# CHECK: xvnmsubadp 7, 63, 27
0xf0 0xff 0xdf 0x8c
# CHECK: xvnmsubasp 7, 63, 27
0xf0 0xff 0xde 0x8c
# CHECK: xvnmsubmdp 7, 63, 27
0xf0 0xff 0xdf 0xcc
# CHECK: xvnmsubmsp 7, 63, 27
0xf0 0xff 0xde 0xcc
# CHECK: xvrdpi 7, 27
0xf0 0xe0 0xdb 0x24
# CHECK: xvrdpic 7, 27
0xf0 0xe0 0xdb 0xac
# CHECK: xvrdpim 7, 27
0xf0 0xe0 0xdb 0xe4
# CHECK: xvrdpip 7, 27
0xf0 0xe0 0xdb 0xa4
# CHECK: xvrdpiz 7, 27
0xf0 0xe0 0xdb 0x64
# CHECK: xvredp 7, 27
0xf0 0xe0 0xdb 0x68
# CHECK: xvresp 7, 27
0xf0 0xe0 0xda 0x68
# CHECK: xvrspi 7, 27
0xf0 0xe0 0xda 0x24
# CHECK: xvrspic 7, 27
0xf0 0xe0 0xda 0xac
# CHECK: xvrspim 7, 27
0xf0 0xe0 0xda 0xe4
# CHECK: xvrspip 7, 27
0xf0 0xe0 0xda 0xa4
# CHECK: xvrspiz 7, 27
0xf0 0xe0 0xda 0x64
# CHECK: xvrsqrtedp 7, 27
0xf0 0xe0 0xdb 0x28
# CHECK: xvrsqrtesp 7, 27
0xf0 0xe0 0xda 0x28
# CHECK: xvsqrtdp 7, 27
0xf0 0xe0 0xdb 0x2c
# CHECK: xvsqrtsp 7, 27
0xf0 0xe0 0xda 0x2c
# CHECK: xvsubdp 7, 63, 27
0xf0 0xff 0xdb 0x44
# CHECK: xvsubsp 7, 63, 27
0xf0 0xff 0xda 0x44
# CHECK: xvtdivdp 6, 63, 27
0xf3 0x1f 0xdb 0xec
# CHECK: xvtdivsp 6, 63, 27
0xf3 0x1f 0xda 0xec
# CHECK: xvtsqrtdp 6, 27
0xf3 0x00 0xdb 0xa8
# CHECK: xvtsqrtsp 6, 27
0xf3 0x00 0xda 0xa8
# CHECK: xxland 7, 63, 27
0xf0 0xff 0xdc 0x14
# CHECK: xxlandc 7, 63, 27
0xf0 0xff 0xdc 0x54
# CHECK: xxlnor 7, 63, 27
0xf0 0xff 0xdd 0x14
# CHECK: xxlor 7, 63, 27
0xf0 0xff 0xdc 0x94
# CHECK: xxlxor 7, 63, 27
0xf0 0xff 0xdc 0xd4
# FIXME: decode as xxmrghd 7, 63, 27
# CHECK: xxpermdi 7, 63, 27, 0
0xf0 0xff 0xd8 0x54
# CHECK: xxmrghw 7, 63, 27
0xf0 0xff 0xd8 0x94
# FIXME: decode as xxmrgld 7, 63, 27
# CHECK: xxpermdi 7, 63, 27, 3
0xf0 0xff 0xdb 0x54
# CHECK: xxmrglw 7, 63, 27
0xf0 0xff 0xd9 0x94
# CHECK: xxpermdi 7, 63, 27, 2
0xf0 0xff 0xda 0x54
# CHECK: xxsel 7, 63, 27, 14
0xf0 0xff 0xdb 0xb4
# CHECK: xxsldwi 7, 63, 27, 1
0xf0 0xff 0xd9 0x14
# FIXME: decode as xxspltd 7, 63, 1
# CHECK: xxpermdi 7, 63, 63, 3
0xf0 0xff 0xfb 0x56
# CHECK: xxspltw 7, 27, 3
0xf0 0xe3 0xda 0x90
# FIXME: decode as xxswapd 7, 63
# CHECK: xxpermdi 7, 63, 63, 2
0xf0 0xff 0xfa 0x56

298
test/MC/PowerPC/vsx.s Normal file
View File

@ -0,0 +1,298 @@
# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | FileCheck %s
# CHECK: lxsdx 7, 5, 31 # encoding: [0x7c,0xe5,0xfc,0x98]
lxsdx 7, 5, 31
# CHECK: lxvd2x 7, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x98]
lxvd2x 7, 5, 31
# CHECK: lxvdsx 7, 5, 31 # encoding: [0x7c,0xe5,0xfa,0x98]
lxvdsx 7, 5, 31
# CHECK: lxvw4x 7, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x18]
lxvw4x 7, 5, 31
# CHECK: stxsdx 8, 5, 31 # encoding: [0x7d,0x05,0xfd,0x98]
stxsdx 8, 5, 31
# CHECK: stxvd2x 8, 5, 31 # encoding: [0x7d,0x05,0xff,0x98]
stxvd2x 8, 5, 31
# CHECK: stxvw4x 8, 5, 31 # encoding: [0x7d,0x05,0xff,0x18]
stxvw4x 8, 5, 31
# CHECK: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64]
xsabsdp 7, 27
# CHECK: xsadddp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x04]
xsadddp 7, 63, 27
# CHECK: xscmpodp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0x5c]
xscmpodp 6, 63, 27
# CHECK: xscmpudp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0x1c]
xscmpudp 6, 63, 27
# CHECK: xscpsgndp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x84]
xscpsgndp 7, 63, 27
# CHECK: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24]
xscvdpsp 7, 27
# CHECK: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60]
xscvdpsxds 7, 27
# CHECK: xscvdpsxws 7, 27 # encoding: [0xf0,0xe0,0xd9,0x60]
xscvdpsxws 7, 27
# CHECK: xscvdpuxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x20]
xscvdpuxds 7, 27
# CHECK: xscvdpuxws 7, 27 # encoding: [0xf0,0xe0,0xd9,0x20]
xscvdpuxws 7, 27
# CHECK: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24]
xscvspdp 7, 27
# CHECK: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0]
xscvsxddp 7, 27
# CHECK: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
xscvuxddp 7, 27
# CHECK: xsdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xc4]
xsdivdp 7, 63, 27
# CHECK: xsmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x0c]
xsmaddadp 7, 63, 27
# CHECK: xsmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x4c]
xsmaddmdp 7, 63, 27
# CHECK: xsmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x04]
xsmaxdp 7, 63, 27
# CHECK: xsmindp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x44]
xsmindp 7, 63, 27
# CHECK: xsmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x8c]
xsmsubadp 7, 63, 27
# CHECK: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc]
xsmsubmdp 7, 63, 27
# CHECK: xsmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x84]
xsmuldp 7, 63, 27
# CHECK: xsnabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa4]
xsnabsdp 7, 27
# CHECK: xsnegdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe4]
xsnegdp 7, 27
# CHECK: xsnmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x0c]
xsnmaddadp 7, 63, 27
# CHECK: xsnmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x4c]
xsnmaddmdp 7, 63, 27
# CHECK: xsnmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x8c]
xsnmsubadp 7, 63, 27
# CHECK: xsnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0xcc]
xsnmsubmdp 7, 63, 27
# CHECK: xsrdpi 7, 27 # encoding: [0xf0,0xe0,0xd9,0x24]
xsrdpi 7, 27
# CHECK: xsrdpic 7, 27 # encoding: [0xf0,0xe0,0xd9,0xac]
xsrdpic 7, 27
# CHECK: xsrdpim 7, 27 # encoding: [0xf0,0xe0,0xd9,0xe4]
xsrdpim 7, 27
# CHECK: xsrdpip 7, 27 # encoding: [0xf0,0xe0,0xd9,0xa4]
xsrdpip 7, 27
# CHECK: xsrdpiz 7, 27 # encoding: [0xf0,0xe0,0xd9,0x64]
xsrdpiz 7, 27
# CHECK: xsredp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x68]
xsredp 7, 27
# CHECK: xsrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x28]
xsrsqrtedp 7, 27
# CHECK: xssqrtdp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x2c]
xssqrtdp 7, 27
# CHECK: xssubdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x44]
xssubdp 7, 63, 27
# CHECK: xstdivdp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0xec]
xstdivdp 6, 63, 27
# CHECK: xstsqrtdp 6, 27 # encoding: [0xf3,0x00,0xd9,0xa8]
xstsqrtdp 6, 27
# CHECK: xvabsdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0x64]
xvabsdp 7, 27
# CHECK: xvabssp 7, 27 # encoding: [0xf0,0xe0,0xde,0x64]
xvabssp 7, 27
# CHECK: xvadddp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x04]
xvadddp 7, 63, 27
# CHECK: xvaddsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x04]
xvaddsp 7, 63, 27
# CHECK: xvcmpeqdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x1c]
xvcmpeqdp 7, 63, 27
# CHECK: xvcmpeqdp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x1c]
xvcmpeqdp. 7, 63, 27
# CHECK: xvcmpeqsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x1c]
xvcmpeqsp 7, 63, 27
# CHECK: xvcmpeqsp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x1c]
xvcmpeqsp. 7, 63, 27
# CHECK: xvcmpgedp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x9c]
xvcmpgedp 7, 63, 27
# CHECK: xvcmpgedp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x9c]
xvcmpgedp. 7, 63, 27
# CHECK: xvcmpgesp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x9c]
xvcmpgesp 7, 63, 27
# CHECK: xvcmpgesp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x9c]
xvcmpgesp. 7, 63, 27
# CHECK: xvcmpgtdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x5c]
xvcmpgtdp 7, 63, 27
# CHECK: xvcmpgtdp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x5c]
xvcmpgtdp. 7, 63, 27
# CHECK: xvcmpgtsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x5c]
xvcmpgtsp 7, 63, 27
# CHECK: xvcmpgtsp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x5c]
xvcmpgtsp. 7, 63, 27
# CHECK: xvcpsgndp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x84]
xvcpsgndp 7, 63, 27
# CHECK: xvcpsgnsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x84]
xvcpsgnsp 7, 63, 27
# CHECK: xvcvdpsp 7, 27 # encoding: [0xf0,0xe0,0xde,0x24]
xvcvdpsp 7, 27
# CHECK: xvcvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdf,0x60]
xvcvdpsxds 7, 27
# CHECK: xvcvdpsxws 7, 27 # encoding: [0xf0,0xe0,0xdb,0x60]
xvcvdpsxws 7, 27
# CHECK: xvcvdpuxds 7, 27 # encoding: [0xf0,0xe0,0xdf,0x20]
xvcvdpuxds 7, 27
# CHECK: xvcvdpuxws 7, 27 # encoding: [0xf0,0xe0,0xdb,0x20]
xvcvdpuxws 7, 27
# CHECK: xvcvspdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0x24]
xvcvspdp 7, 27
# CHECK: xvcvspsxds 7, 27 # encoding: [0xf0,0xe0,0xde,0x60]
xvcvspsxds 7, 27
# CHECK: xvcvspsxws 7, 27 # encoding: [0xf0,0xe0,0xda,0x60]
xvcvspsxws 7, 27
# CHECK: xvcvspuxds 7, 27 # encoding: [0xf0,0xe0,0xde,0x20]
xvcvspuxds 7, 27
# CHECK: xvcvspuxws 7, 27 # encoding: [0xf0,0xe0,0xda,0x20]
xvcvspuxws 7, 27
# CHECK: xvcvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xe0]
xvcvsxddp 7, 27
# CHECK: xvcvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xe0]
xvcvsxdsp 7, 27
# CHECK: xvcvsxwdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0xe0]
xvcvsxwdp 7, 27
# CHECK: xvcvsxwsp 7, 27 # encoding: [0xf0,0xe0,0xda,0xe0]
xvcvsxwsp 7, 27
# CHECK: xvcvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xa0]
xvcvuxddp 7, 27
# CHECK: xvcvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xa0]
xvcvuxdsp 7, 27
# CHECK: xvcvuxwdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0xa0]
xvcvuxwdp 7, 27
# CHECK: xvcvuxwsp 7, 27 # encoding: [0xf0,0xe0,0xda,0xa0]
xvcvuxwsp 7, 27
# CHECK: xvdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0xc4]
xvdivdp 7, 63, 27
# CHECK: xvdivsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0xc4]
xvdivsp 7, 63, 27
# CHECK: xvmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x0c]
xvmaddadp 7, 63, 27
# CHECK: xvmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x0c]
xvmaddasp 7, 63, 27
# CHECK: xvmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x4c]
xvmaddmdp 7, 63, 27
# CHECK: xvmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x4c]
xvmaddmsp 7, 63, 27
# CHECK: xvmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x04]
xvmaxdp 7, 63, 27
# CHECK: xvmaxsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x04]
xvmaxsp 7, 63, 27
# CHECK: xvmindp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x44]
xvmindp 7, 63, 27
# CHECK: xvminsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x44]
xvminsp 7, 63, 27
# CHECK: xvcpsgndp 7, 63, 63 # encoding: [0xf0,0xff,0xff,0x86]
xvmovdp 7, 63
# CHECK: xvcpsgnsp 7, 63, 63 # encoding: [0xf0,0xff,0xfe,0x86]
xvmovsp 7, 63
# CHECK: xvmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x8c]
xvmsubadp 7, 63, 27
# CHECK: xvmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x8c]
xvmsubasp 7, 63, 27
# CHECK: xvmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0xcc]
xvmsubmdp 7, 63, 27
# CHECK: xvmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0xcc]
xvmsubmsp 7, 63, 27
# CHECK: xvmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x84]
xvmuldp 7, 63, 27
# CHECK: xvmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x84]
xvmulsp 7, 63, 27
# CHECK: xvnabsdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xa4]
xvnabsdp 7, 27
# CHECK: xvnabssp 7, 27 # encoding: [0xf0,0xe0,0xde,0xa4]
xvnabssp 7, 27
# CHECK: xvnegdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xe4]
xvnegdp 7, 27
# CHECK: xvnegsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xe4]
xvnegsp 7, 27
# CHECK: xvnmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x0c]
xvnmaddadp 7, 63, 27
# CHECK: xvnmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x0c]
xvnmaddasp 7, 63, 27
# CHECK: xvnmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x4c]
xvnmaddmdp 7, 63, 27
# CHECK: xvnmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x4c]
xvnmaddmsp 7, 63, 27
# CHECK: xvnmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x8c]
xvnmsubadp 7, 63, 27
# CHECK: xvnmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x8c]
xvnmsubasp 7, 63, 27
# CHECK: xvnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0xcc]
xvnmsubmdp 7, 63, 27
# CHECK: xvnmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0xcc]
xvnmsubmsp 7, 63, 27
# CHECK: xvrdpi 7, 27 # encoding: [0xf0,0xe0,0xdb,0x24]
xvrdpi 7, 27
# CHECK: xvrdpic 7, 27 # encoding: [0xf0,0xe0,0xdb,0xac]
xvrdpic 7, 27
# CHECK: xvrdpim 7, 27 # encoding: [0xf0,0xe0,0xdb,0xe4]
xvrdpim 7, 27
# CHECK: xvrdpip 7, 27 # encoding: [0xf0,0xe0,0xdb,0xa4]
xvrdpip 7, 27
# CHECK: xvrdpiz 7, 27 # encoding: [0xf0,0xe0,0xdb,0x64]
xvrdpiz 7, 27
# CHECK: xvredp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x68]
xvredp 7, 27
# CHECK: xvresp 7, 27 # encoding: [0xf0,0xe0,0xda,0x68]
xvresp 7, 27
# CHECK: xvrspi 7, 27 # encoding: [0xf0,0xe0,0xda,0x24]
xvrspi 7, 27
# CHECK: xvrspic 7, 27 # encoding: [0xf0,0xe0,0xda,0xac]
xvrspic 7, 27
# CHECK: xvrspim 7, 27 # encoding: [0xf0,0xe0,0xda,0xe4]
xvrspim 7, 27
# CHECK: xvrspip 7, 27 # encoding: [0xf0,0xe0,0xda,0xa4]
xvrspip 7, 27
# CHECK: xvrspiz 7, 27 # encoding: [0xf0,0xe0,0xda,0x64]
xvrspiz 7, 27
# CHECK: xvrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x28]
xvrsqrtedp 7, 27
# CHECK: xvrsqrtesp 7, 27 # encoding: [0xf0,0xe0,0xda,0x28]
xvrsqrtesp 7, 27
# CHECK: xvsqrtdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x2c]
xvsqrtdp 7, 27
# CHECK: xvsqrtsp 7, 27 # encoding: [0xf0,0xe0,0xda,0x2c]
xvsqrtsp 7, 27
# CHECK: xvsubdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x44]
xvsubdp 7, 63, 27
# CHECK: xvsubsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x44]
xvsubsp 7, 63, 27
# CHECK: xvtdivdp 6, 63, 27 # encoding: [0xf3,0x1f,0xdb,0xec]
xvtdivdp 6, 63, 27
# CHECK: xvtdivsp 6, 63, 27 # encoding: [0xf3,0x1f,0xda,0xec]
xvtdivsp 6, 63, 27
# CHECK: xvtsqrtdp 6, 27 # encoding: [0xf3,0x00,0xdb,0xa8]
xvtsqrtdp 6, 27
# CHECK: xvtsqrtsp 6, 27 # encoding: [0xf3,0x00,0xda,0xa8]
xvtsqrtsp 6, 27
# CHECK: xxland 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x14]
xxland 7, 63, 27
# CHECK: xxlandc 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x54]
xxlandc 7, 63, 27
# CHECK: xxlnor 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x14]
xxlnor 7, 63, 27
# CHECK: xxlor 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x94]
xxlor 7, 63, 27
# CHECK: xxlxor 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0xd4]
xxlxor 7, 63, 27
# CHECK: xxpermdi 7, 63, 27, 0 # encoding: [0xf0,0xff,0xd8,0x54]
xxmrghd 7, 63, 27
# CHECK: xxmrghw 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x94]
xxmrghw 7, 63, 27
# CHECK: xxpermdi 7, 63, 27, 3 # encoding: [0xf0,0xff,0xdb,0x54]
xxmrgld 7, 63, 27
# CHECK: xxmrglw 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x94]
xxmrglw 7, 63, 27
# CHECK: xxpermdi 7, 63, 27, 2 # encoding: [0xf0,0xff,0xda,0x54]
xxpermdi 7, 63, 27, 2
# CHECK: xxsel 7, 63, 27, 14 # encoding: [0xf0,0xff,0xdb,0xb4]
xxsel 7, 63, 27, 14
# CHECK: xxsldwi 7, 63, 27, 1 # encoding: [0xf0,0xff,0xd9,0x14]
xxsldwi 7, 63, 27, 1
# CHECK: xxpermdi 7, 63, 63, 3 # encoding: [0xf0,0xff,0xfb,0x56]
xxspltd 7, 63, 1
# CHECK: xxspltw 7, 27, 3 # encoding: [0xf0,0xe3,0xda,0x90]
xxspltw 7, 27, 3
# CHECK: xxpermdi 7, 63, 63, 2 # encoding: [0xf0,0xff,0xfa,0x56]
xxswapd 7, 63