From 308873bcb80c04c8c53ab97bb4f698d50279fdbe Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Thu, 7 May 2015 18:24:05 +0000 Subject: [PATCH] Add VSX Scalar loads and stores to the PPC back end This patch corresponds to review: http://reviews.llvm.org/D9440 It adds a new register class to the PPC back end to contain single precision values in VSX registers. Additionally, it adds scalar loads and stores for VSX registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 24 +++ .../PowerPC/Disassembler/PPCDisassembler.cpp | 26 ++++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 7 +- lib/Target/PowerPC/PPCISelLowering.cpp | 21 ++- lib/Target/PowerPC/PPCInstrInfo.cpp | 13 +- lib/Target/PowerPC/PPCInstrVSX.td | 61 +++++++- lib/Target/PowerPC/PPCRegisterInfo.cpp | 3 + lib/Target/PowerPC/PPCRegisterInfo.td | 3 + test/CodeGen/PowerPC/ppc64le-smallarg.ll | 6 +- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll | 139 ++++++++++++++++++ test/MC/Disassembler/PowerPC/vsx.txt | 15 ++ test/MC/PowerPC/vsx.s | 43 ++++-- 12 files changed, 337 insertions(+), 24 deletions(-) create mode 100644 test/CodeGen/PowerPC/vsx_scalar_ld_st.ll diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index b6f10e61079..8280f74c063 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -132,6 +132,25 @@ static const MCPhysReg VSFRegs[64] = { PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 }; +static const MCPhysReg VSSRegs[64] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static unsigned QFRegs[32] = { PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, @@ -577,6 +596,11 @@ public: Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()])); } + void addRegVSSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VSSRegs[getVSReg()])); + } + void addRegQFRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 5cbf3d9a189..9a5c829aa90 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -131,6 +131,26 @@ static const unsigned VSFRegs[] = { PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 }; +static const unsigned VSSRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + static const unsigned GPRegs[] = { PPC::R0, PPC::R1, PPC::R2, PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, @@ -231,6 +251,12 @@ static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, VSFRegs); } +static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSSRegs); +} + static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 512eddcb0da..afc1f36ad15 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2734,7 +2734,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { else if (N->getValueType(0) == MVT::i64) SelectCCOp = PPC::SELECT_CC_I8; else if (N->getValueType(0) == MVT::f32) - SelectCCOp = PPC::SELECT_CC_F4; + if (PPCSubTarget->hasP8Vector()) + SelectCCOp = PPC::SELECT_CC_VSSRC; + else + SelectCCOp = PPC::SELECT_CC_F4; else if (N->getValueType(0) == MVT::f64) if (PPCSubTarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; @@ -3449,6 +3452,7 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_QBRC: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: + case PPC::SELECT_VSSRC: case PPC::SELECT_VSRC: { SDValue Op = MachineNode->getOperand(0); if (Op.isMachineOpcode()) { @@ -3759,6 +3763,7 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_QBRC: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: + case PPC::SELECT_VSSRC: case PPC::SELECT_VSRC: if (Op1Set) ResNode = MachineNode->getOperand(1).getNode(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 33688dc3c08..61fc0c92cb9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -582,6 +582,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) + addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); + addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); @@ -2680,7 +2683,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::GPRCRegClass; break; case MVT::f32: - RC = &PPC::F4RCRegClass; + if (Subtarget.hasP8Vector()) + RC = &PPC::VSSRCRegClass; + else + RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) @@ -3094,7 +3100,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], + Subtarget.hasP8Vector() + ? &PPC::VSSRCRegClass + : &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass @@ -8383,6 +8392,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_QBRC || MI->getOpcode() == PPC::SELECT_CC_VRRC || MI->getOpcode() == PPC::SELECT_CC_VSFRC || + MI->getOpcode() == PPC::SELECT_CC_VSSRC || MI->getOpcode() == PPC::SELECT_CC_VSRC || MI->getOpcode() == PPC::SELECT_I4 || MI->getOpcode() == PPC::SELECT_I8 || @@ -8393,6 +8403,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || + MI->getOpcode() == PPC::SELECT_VSSRC || MI->getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to @@ -8429,6 +8440,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || + MI->getOpcode() == PPC::SELECT_VSSRC || MI->getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); @@ -10648,7 +10660,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Constraint == "wf") { return std::make_pair(0U, &PPC::VSRCRegClass); } else if (Constraint == "ws") { - return std::make_pair(0U, &PPC::VSFRCRegClass); + if (VT == MVT::f32) + return std::make_pair(0U, &PPC::VSSRCRegClass); + else + return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair R = diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index c9c2949dc6c..85ba5a1d640 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -815,7 +815,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // copies are generated, they are close enough to some use that the // lower-latency form is preferable. Opc = PPC::XXLOR; - else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg)) + else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) || + PPC::VSSRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::XXLORf; else if (PPC::QFRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::QVFMR; @@ -900,6 +901,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(Subtarget.isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -1013,6 +1020,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(Subtarget.isDarwin() && "VRSAVE only needs spill/restore on Darwin"); diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 3cff14c594f..d93fd5e27d6 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -40,6 +40,13 @@ def vsfrc : RegisterOperand { let ParserMatchClass = PPCRegVSFRCAsmOperand; } +def PPCRegVSSRCAsmOperand : AsmOperandClass { + let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vssrc : RegisterOperand { + let ParserMatchClass = PPCRegVSSRCAsmOperand; +} + // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -103,7 +110,7 @@ let Uses = [RM] in { (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>; - } + } // mayLoad // Store indexed instructions let mayStore = 1 in { @@ -121,7 +128,8 @@ let Uses = [RM] in { (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, [(store v4i32:$XT, xoaddr:$dst)]>; - } + + } // mayStore // Add/Mul Instructions let isCommutable = 1 in { @@ -791,6 +799,15 @@ let usesCustomInserter = 1, // Expanded after instruction selection. "#SELECT_VSFRC", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst), + (ins crrc:$cond, f4rc:$T, f4rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSSRC", + []>; + def SELECT_VSSRC: Pseudo<(outs f4rc:$dst), + (ins crbitrc:$cond, f4rc:$T, f4rc:$F), + "#SELECT_VSSRC", + [(set f32:$dst, + (select i1:$cond, f32:$T, f32:$F))]>; } // usesCustomInserter } // AddedComplexity @@ -987,7 +1004,45 @@ def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; -} // AddedComplexity = 500 + // VSX scalar loads introduced in ISA 2.07 + let mayLoad = 1 in { + def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, + [(set f32:$XT, (load xoaddr:$src))]>; + def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwax $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + } // mayLoad + + // VSX scalar stores introduced in ISA 2.07 + let mayStore = 1 in { + def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), + "stxsspx $XT, $dst", IIC_LdStSTFD, + [(store f32:$XT, xoaddr:$dst)]>; + def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + } // mayStore +def : Pat<(f64 (extloadf32 xoaddr:$src)), + (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; +def : Pat<(f64 (fextend f32:$src)), + (COPY_TO_REGCLASS $src, VSFRC)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; +} // AddedComplexity = 400 } // HasP8Vector let Predicates = [HasDirectMove, HasVSX] in { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0e568d3278e..656376c641a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -282,6 +282,7 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, return 32 - DefaultSafety; case PPC::VSRCRegClassID: case PPC::VSFRCRegClassID: + case PPC::VSSRCRegClassID: return 64 - DefaultSafety; case PPC::CRRCRegClassID: return 8 - DefaultSafety; @@ -300,6 +301,8 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, return &PPC::VSFRCRegClass; else if (RC == &PPC::VRRCRegClass) return &PPC::VSRCRegClass; + else if (RC == &PPC::F4RCRegClass && Subtarget.hasP8Vector()) + return &PPC::VSSRCRegClass; } return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 398be783881..e5f363c443c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -316,6 +316,9 @@ def VFRC : RegisterClass<"PPC", [f64], 64, VF22, VF21, VF20)>; def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; +// Register class for single precision scalars in VSX registers +def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>; + // For QPX def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13), (sequence "QF%u", 31, 14))>; diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll index 77d066363cd..070a617ffe4 100644 --- a/test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -42,7 +42,8 @@ entry: ret float %x } ; CHECK: @callee2 -; CHECK: lfs {{[0-9]+}}, 136(1) +; CHECK: addi [[TOCREG:[0-9]+]], 1, 136 +; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]] ; CHECK: blr define void @caller2() { @@ -52,7 +53,8 @@ entry: ret void } ; CHECK: @caller2 -; CHECK: stfs {{[0-9]+}}, 136(1) +; CHECK: li [[TOCOFF:[0-9]+]], 136 +; CHECK: stxsspx {{[0-9]+}}, 1, [[TOCOFF]] ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll new file mode 100644 index 00000000000..10297088596 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -0,0 +1,139 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s + +@d = common global double 0.000000e+00, align 8 +@f = common global float 0.000000e+00, align 4 +@i = common global i32 0, align 4 +@ui = common global i32 0, align 4 + +; Function Attrs: nounwind +define void @dblToInt() #0 { +entry: + %ii = alloca i32, align 4 + %0 = load double, double* @d, align 8 + %conv = fptosi double %0 to i32 + store volatile i32 %conv, i32* %ii, align 4 + ret void +; CHECK-LABEL: @dblToInt +; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]], +; CHECK: stxsiwx [[REGCONV1]], +} + +; Function Attrs: nounwind +define void @fltToInt() #0 { +entry: + %ii = alloca i32, align 4 + %0 = load float, float* @f, align 4 + %conv = fptosi float %0 to i32 + store volatile i32 %conv, i32* %ii, align 4 + ret void +; CHECK-LABEL: @fltToInt +; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]], +; CHECK: stxsiwx [[REGCONV2]], +} + +; Function Attrs: nounwind +define void @intToDbl() #0 { +entry: + %dd = alloca double, align 8 + %0 = load i32, i32* @i, align 4 + %conv = sitofp i32 %0 to double + store volatile double %conv, double* %dd, align 8 + ret void +; CHECK-LABEL: @intToDbl +; CHECK: lxsiwax [[REGLD1:[0-9]+]], +; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]] +} + +; Function Attrs: nounwind +define void @intToFlt() #0 { +entry: + %ff = alloca float, align 4 + %0 = load i32, i32* @i, align 4 + %conv = sitofp i32 %0 to float + store volatile float %conv, float* %ff, align 4 + ret void +; CHECK-LABEL: @intToFlt +; CHECK: lxsiwax [[REGLD2:[0-9]+]], +; FIXME: the below will change when the VSX form is implemented +; CHECK: fcfids {{[0-9]}}, [[REGLD2]] +} + +; Function Attrs: nounwind +define void @dblToUInt() #0 { +entry: + %uiui = alloca i32, align 4 + %0 = load double, double* @d, align 8 + %conv = fptoui double %0 to i32 + store volatile i32 %conv, i32* %uiui, align 4 + ret void +; CHECK-LABEL: @dblToUInt +; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]], +; CHECK: stxsiwx [[REGCONV3]], +} + +; Function Attrs: nounwind +define void @fltToUInt() #0 { +entry: + %uiui = alloca i32, align 4 + %0 = load float, float* @f, align 4 + %conv = fptoui float %0 to i32 + store volatile i32 %conv, i32* %uiui, align 4 + ret void +; CHECK-LABEL: @fltToUInt +; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]], +; CHECK: stxsiwx [[REGCONV4]], +} + +; Function Attrs: nounwind +define void @uIntToDbl() #0 { +entry: + %dd = alloca double, align 8 + %0 = load i32, i32* @ui, align 4 + %conv = uitofp i32 %0 to double + store volatile double %conv, double* %dd, align 8 + ret void +; CHECK-LABEL: @uIntToDbl +; CHECK: lxsiwzx [[REGLD3:[0-9]+]], +; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]] +} + +; Function Attrs: nounwind +define void @uIntToFlt() #0 { +entry: + %ff = alloca float, align 4 + %0 = load i32, i32* @ui, align 4 + %conv = uitofp i32 %0 to float + store volatile float %conv, float* %ff, align 4 + ret void +; CHECK-LABEL: @uIntToFlt +; CHECK: lxsiwzx [[REGLD4:[0-9]+]], +; FIXME: the below will change when the VSX form is implemented +; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]] +} + +; Function Attrs: nounwind +define void @dblToFloat() #0 { +entry: + %ff = alloca float, align 4 + %0 = load double, double* @d, align 8 + %conv = fptrunc double %0 to float + store volatile float %conv, float* %ff, align 4 + ret void +; CHECK-LABEL: @dblToFloat +; CHECK: lxsdx [[REGLD5:[0-9]+]], +; CHECK: stxsspx [[REGLD5]], +} + +; Function Attrs: nounwind +define void @floatToDbl() #0 { +entry: + %dd = alloca double, align 8 + %0 = load float, float* @f, align 4 + %conv = fpext float %0 to double + store volatile double %conv, double* %dd, align 8 + ret void +; CHECK-LABEL: @floatToDbl +; CHECK: lxsspx [[REGLD5:[0-9]+]], +; CHECK: stxsdx [[REGLD5]], +} diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt index bda25dfd2cb..417efd0fe17 100644 --- a/test/MC/Disassembler/PowerPC/vsx.txt +++ b/test/MC/Disassembler/PowerPC/vsx.txt @@ -3,6 +3,15 @@ # CHECK: lxsdx 7, 5, 31 0x7c 0xe5 0xfc 0x98 +# CHECK: lxsiwax 7, 5, 31 +0x7c 0xe5 0xf8 0x98 + +# CHECK: lxsiwzx 7, 5, 31 +0x7c 0xe5 0xf8 0x18 + +# CHECK: lxsspx 7, 5, 31 +0x7c 0xe5 0xfc 0x18 + # CHECK: lxvd2x 7, 5, 31 0x7c 0xe5 0xfe 0x98 @@ -15,6 +24,12 @@ # CHECK: stxsdx 8, 5, 31 0x7d 0x05 0xfd 0x98 +# CHECK: stxsiwx 8, 5, 31 +0x7d 0x05 0xf9 0x18 + +# CHECK: stxsspx 8, 5, 31 +0x7d 0x05 0xfd 0x18 + # CHECK: stxvd2x 8, 5, 31 0x7d 0x05 0xff 0x98 diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s index f723a3ebc1a..75c934f78c1 100644 --- a/test/MC/PowerPC/vsx.s +++ b/test/MC/PowerPC/vsx.s @@ -5,26 +5,41 @@ # CHECK-LE: xxswapd 7, 63 # encoding: [0x56,0xfa,0xff,0xf0] xxswapd %vs7, %vs63 -# CHECK-BE: lxsdx 39, 5, 31 # encoding: [0x7c,0xe5,0xfc,0x99] -# CHECK-LE: lxsdx 39, 5, 31 # encoding: [0x99,0xfc,0xe5,0x7c] +# CHECK-BE: lxsdx 39, 5, 31 # encoding: [0x7c,0xe5,0xfc,0x99] +# CHECK-LE: lxsdx 39, 5, 31 # encoding: [0x99,0xfc,0xe5,0x7c] lxsdx 39, 5, 31 -# CHECK-BE: lxvd2x 39, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x99] -# CHECK-LE: lxvd2x 39, 5, 31 # encoding: [0x99,0xfe,0xe5,0x7c] +# CHECK-BE: lxsiwax 39, 5, 31 # encoding: [0x7c,0xe5,0xf8,0x99] +# CHECK-LE: lxsiwax 39, 5, 31 # encoding: [0x99,0xf8,0xe5,0x7c] + lxsiwax 39, 5, 31 +# CHECK-BE: lxsiwzx 39, 5, 31 # encoding: [0x7c,0xe5,0xf8,0x19] +# CHECK-LE: lxsiwzx 39, 5, 31 # encoding: [0x19,0xf8,0xe5,0x7c] + lxsiwzx 39, 5, 31 +# CHECK-BE: lxsspx 39, 5, 31 # encoding: [0x7c,0xe5,0xfc,0x19] +# CHECK-LE: lxsspx 39, 5, 31 # encoding: [0x19,0xfc,0xe5,0x7c] + lxsspx 39, 5, 31 +# CHECK-BE: lxvd2x 39, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x99] +# CHECK-LE: lxvd2x 39, 5, 31 # encoding: [0x99,0xfe,0xe5,0x7c] lxvd2x 39, 5, 31 -# CHECK-BE: lxvdsx 39, 5, 31 # encoding: [0x7c,0xe5,0xfa,0x99] -# CHECK-LE: lxvdsx 39, 5, 31 # encoding: [0x99,0xfa,0xe5,0x7c] +# CHECK-BE: lxvdsx 39, 5, 31 # encoding: [0x7c,0xe5,0xfa,0x99] +# CHECK-LE: lxvdsx 39, 5, 31 # encoding: [0x99,0xfa,0xe5,0x7c] lxvdsx 39, 5, 31 -# CHECK-BE: lxvw4x 39, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x19] -# CHECK-LE: lxvw4x 39, 5, 31 # encoding: [0x19,0xfe,0xe5,0x7c] +# CHECK-BE: lxvw4x 39, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x19] +# CHECK-LE: lxvw4x 39, 5, 31 # encoding: [0x19,0xfe,0xe5,0x7c] lxvw4x 39, 5, 31 -# CHECK-BE: stxsdx 40, 5, 31 # encoding: [0x7d,0x05,0xfd,0x99] -# CHECK-LE: stxsdx 40, 5, 31 # encoding: [0x99,0xfd,0x05,0x7d] +# CHECK-BE: stxsdx 40, 5, 31 # encoding: [0x7d,0x05,0xfd,0x99] +# CHECK-LE: stxsdx 40, 5, 31 # encoding: [0x99,0xfd,0x05,0x7d] stxsdx 40, 5, 31 -# CHECK-BE: stxvd2x 40, 5, 31 # encoding: [0x7d,0x05,0xff,0x99] -# CHECK-LE: stxvd2x 40, 5, 31 # encoding: [0x99,0xff,0x05,0x7d] +# CHECK-BE: stxsiwx 40, 5, 31 # encoding: [0x7d,0x05,0xf9,0x19] +# CHECK-LE: stxsiwx 40, 5, 31 # encoding: [0x19,0xf9,0x05,0x7d] + stxsiwx 40, 5, 31 +# CHECK-BE: stxsspx 40, 5, 31 # encoding: [0x7d,0x05,0xfd,0x19] +# CHECK-LE: stxsspx 40, 5, 31 # encoding: [0x19,0xfd,0x05,0x7d] + stxsspx 40, 5, 31 +# CHECK-BE: stxvd2x 40, 5, 31 # encoding: [0x7d,0x05,0xff,0x99] +# CHECK-LE: stxvd2x 40, 5, 31 # encoding: [0x99,0xff,0x05,0x7d] stxvd2x 40, 5, 31 -# CHECK-BE: stxvw4x 40, 5, 31 # encoding: [0x7d,0x05,0xff,0x19] -# CHECK-LE: stxvw4x 40, 5, 31 # encoding: [0x19,0xff,0x05,0x7d] +# CHECK-BE: stxvw4x 40, 5, 31 # encoding: [0x7d,0x05,0xff,0x19] +# CHECK-LE: stxvw4x 40, 5, 31 # encoding: [0x19,0xff,0x05,0x7d] stxvw4x 40, 5, 31 # CHECK-BE: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64] # CHECK-LE: xsabsdp 7, 27 # encoding: [0x64,0xdd,0xe0,0xf0]