diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index 4f3f18ed499..d4d636d3479 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -40,8 +40,8 @@ include "SystemZOperands.td" include "SystemZPatterns.td" include "SystemZInstrFormats.td" include "SystemZInstrInfo.td" -include "SystemZInstrFP.td" include "SystemZInstrVector.td" +include "SystemZInstrFP.td" def SystemZInstrInfo : InstrInfo {} diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 026a75f2140..f16488063af 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { Context); } +// MI loads the high part of a vector from memory. Return an instruction +// that uses replicating vector load Opcode to do the same thing. +static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()); +} + +// MI stores the high part of a vector to memory. Return an instruction +// that uses elemental vector store Opcode to do the same thing. +static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); +} + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(MF->getContext(), *this); MCInst LoweredMI; @@ -158,6 +179,29 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); break; + case SystemZ::VLR32: + case SystemZ::VLR64: + LoweredMI = MCInstBuilder(SystemZ::VLR) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); + break; + + case SystemZ::VL32: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); + break; + + case SystemZ::VL64: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); + break; + + case SystemZ::VST32: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); + break; + + case SystemZ::VST64: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG); + break; + case SystemZ::LFER: LoweredMI = MCInstBuilder(SystemZ::VLGVF) .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 391cb8c6fc9..ff79a48179f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -91,9 +91,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); else addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); - addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); - addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); - addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); + if (Subtarget.hasVector()) { + addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); + } else { + addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + } addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index efa29fa9c00..27fbd7df288 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; } -defm : CompareZeroFP; -defm : CompareZeroFP; -defm : CompareZeroFP; +// Note that the comparison against zero operation is not available if we +// have vector support, since load-and-test instructions will partially +// clobber the target (vector) register. +let Predicates = [FeatureNoVector] in { + defm : CompareZeroFP; + defm : CompareZeroFP; + defm : CompareZeroFP; +} // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; @@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; + // For z13 we prefer LDE over LE to avoid partial register dependencies. + def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; + // These instructions are split after register allocation, so we don't // want a custom inserter. let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index dc9dfa801fd..71eb9986499 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2151,10 +2151,13 @@ class PrefetchRILPC opcode, // A floating-point load-and test operation. Create both a normal unary // operation and one that acts as a comparison against zero. +// Note that the comparison against zero operation is not available if we +// have vector support, since load-and-test instructions will partially +// clobber the target (vector) register. multiclass LoadAndTestRRE opcode, RegisterOperand cls> { def "" : UnaryRRE; - let isCodeGenOnly = 1 in + let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in def Compare : CompareRRE; } @@ -2401,6 +2404,23 @@ class Alias pattern> class UnaryAliasVRS : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; +// An alias of a UnaryVRR*, but with different register sizes. +class UnaryAliasVRR + : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2), + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>; + +// An alias of a UnaryVRX, but with different register sizes. +class UnaryAliasVRX + : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2), + [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>; + +// An alias of a StoreVRX, but with different register sizes. +class StoreAliasVRX + : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2), + [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>; + // An alias of a BinaryRI, but with different register sizes. class BinaryAliasRI diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 63101a9d000..8dbd9df32b7 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -578,6 +578,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opcode = SystemZ::LDR; else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LXR; + else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR32; + else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR64; else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::VLR; else @@ -1118,6 +1122,12 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, } else if (RC == &SystemZ::FP128BitRegClass) { LoadOpcode = SystemZ::LX; StoreOpcode = SystemZ::STX; + } else if (RC == &SystemZ::VR32BitRegClass) { + LoadOpcode = SystemZ::VL32; + StoreOpcode = SystemZ::VST32; + } else if (RC == &SystemZ::VR64BitRegClass) { + LoadOpcode = SystemZ::VL64; + StoreOpcode = SystemZ::VST64; } else if (RC == &SystemZ::VF128BitRegClass || RC == &SystemZ::VR128BitRegClass) { LoadOpcode = SystemZ::VL; diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index b6c8042b3c8..8abaeb69a20 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -14,6 +14,8 @@ let Predicates = [FeatureVector] in { // Register move. def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; + def VLR32 : UnaryAliasVRR; + def VLR64 : UnaryAliasVRR; // Load GR from VR element. def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; @@ -123,6 +125,13 @@ let Predicates = [FeatureVector] in { def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), (VLREPG bdxaddr12only:$addr)>; + // Use VLREP to load subvectors. These patterns use "12pair" because + // LEY and LDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + def VL32 : UnaryAliasVRX; + def VL64 : UnaryAliasVRX; + // Load logical element and zero. def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; @@ -193,6 +202,13 @@ let Predicates = [FeatureVector] in { imm32zx1:$index), (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + // Use VSTE to store subvectors. These patterns use "12pair" because + // STEY and STDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + def VST32 : StoreAliasVRX; + def VST64 : StoreAliasVRX; + // Scatter element. def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; @@ -778,7 +794,7 @@ multiclass VectorRounding { let Predicates = [FeatureVector] in { // Add. def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; - def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; // Convert from fixed 64-bit. def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; @@ -804,53 +820,55 @@ let Predicates = [FeatureVector] in { // Divide. def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; - def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; // Load FP integer. def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>; def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; defm : VectorRounding; + defm : VectorRounding; // Load lengthened. def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, null_frag, v64db, v32eb, 2, 8>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>; // Load rounded, def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : FPConversion; // Multiply. def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; - def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; // Multiply and add. def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; - def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; // Multiply and subtract. def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; - def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; // Load complement, def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; - def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>; + def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; // Load negative. def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; - def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>; + def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; // Load positive. def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; - def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>; + def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; // Square root. def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; - def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; // Subtract. def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; - def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; // Test data class immediate. let Defs = [CC] in { @@ -866,7 +884,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVector] in { // Compare scalar. let Defs = [CC] in - def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, null_frag, v64db, 3>; + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; // Compare and signal scalar. let Defs = [CC] in diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index ec7a8c40d18..d1a17c5500d 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -15,6 +15,7 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -36,6 +37,10 @@ public: private: bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, unsigned LLIxL, unsigned LLIxH); + bool shortenOn0(MachineInstr &MI, unsigned Opcode); + bool shortenOn01(MachineInstr &MI, unsigned Opcode); + bool shortenOn001(MachineInstr &MI, unsigned Opcode); + bool shortenFPConv(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; @@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap, return false; } +// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding. +bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0 and 1 have a +// 4-bit encoding. +bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a +// 4-bit encoding and if operands 0 and 1 are tied. +bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && + SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// MI is a vector-style conversion instruction with the operand order: +// destination, source, exact-suppress, rounding-mode. If both registers +// have a 4-bit encoding then change it to Opcode, which has operand order: +// destination, rouding-mode, source, exact-suppress. +bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MachineOperand Dest(MI.getOperand(0)); + MachineOperand Src(MI.getOperand(1)); + MachineOperand Suppress(MI.getOperand(2)); + MachineOperand Mode(MI.getOperand(3)); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.RemoveOperand(1); + MI.RemoveOperand(0); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .addOperand(Dest) + .addOperand(Mode) + .addOperand(Src) + .addOperand(Suppress); + return true; + } + return false; +} + // Process all instructions in MBB. Return true if something changed. bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { // Iterate backwards through the block looking for instructions to change. for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { MachineInstr &MI = *MBBI; - unsigned Opcode = MI.getOpcode(); - if (Opcode == SystemZ::IILF) + switch (MI.getOpcode()) { + case SystemZ::IILF: Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, SystemZ::LLILH); - else if (Opcode == SystemZ::IIHF) + break; + + case SystemZ::IIHF: Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, SystemZ::LLIHH); + break; + + case SystemZ::WFADB: + Changed |= shortenOn001(MI, SystemZ::ADBR); + break; + + case SystemZ::WFDDB: + Changed |= shortenOn001(MI, SystemZ::DDBR); + break; + + case SystemZ::WFIDB: + Changed |= shortenFPConv(MI, SystemZ::FIDBRA); + break; + + case SystemZ::WLDEB: + Changed |= shortenOn01(MI, SystemZ::LDEBR); + break; + + case SystemZ::WLEDB: + Changed |= shortenFPConv(MI, SystemZ::LEDBRA); + break; + + case SystemZ::WFMDB: + Changed |= shortenOn001(MI, SystemZ::MDBR); + break; + + case SystemZ::WFLCDB: + Changed |= shortenOn01(MI, SystemZ::LCDBR); + break; + + case SystemZ::WFLNDB: + Changed |= shortenOn01(MI, SystemZ::LNDBR); + break; + + case SystemZ::WFLPDB: + Changed |= shortenOn01(MI, SystemZ::LPDBR); + break; + + case SystemZ::WFSQDB: + Changed |= shortenOn01(MI, SystemZ::SQDBR); + break; + + case SystemZ::WFSDB: + Changed |= shortenOn001(MI, SystemZ::SDBR); + break; + + case SystemZ::WFCDB: + Changed |= shortenOn01(MI, SystemZ::CDBR); + break; + + case SystemZ::VL32: + // For z13 we prefer LDE over LE to avoid partial register dependencies. + Changed |= shortenOn0(MI, SystemZ::LDE32); + break; + + case SystemZ::VST32: + Changed |= shortenOn0(MI, SystemZ::STE); + break; + + case SystemZ::VL64: + Changed |= shortenOn0(MI, SystemZ::LD); + break; + + case SystemZ::VST64: + Changed |= shortenOn0(MI, SystemZ::STD); + break; + } + unsigned UsedLow = 0; unsigned UsedHigh = 0; for (auto MOI = MI.operands_begin(), MOE = MI.operands_end(); diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll index d14a92acae8..3b143d93315 100644 --- a/test/CodeGen/SystemZ/fp-abs-01.ll +++ b/test/CodeGen/SystemZ/fp-abs-01.ll @@ -1,6 +1,7 @@ ; Test floating-point absolute. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32. declare float @llvm.fabs.f32(float %f) diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll index deec8c32b4a..e831ddb86fe 100644 --- a/test/CodeGen/SystemZ/fp-abs-02.ll +++ b/test/CodeGen/SystemZ/fp-abs-02.ll @@ -1,6 +1,7 @@ ; Test negated floating-point absolute. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32. declare float @llvm.fabs.f32(float %f) diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll index 07c7462020f..5be1ad79d45 100644 --- a/test/CodeGen/SystemZ/fp-add-02.ll +++ b/test/CodeGen/SystemZ/fp-add-02.ll @@ -1,7 +1,8 @@ ; Test 64-bit floating-point addition. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s - +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() ; Check register addition. @@ -76,7 +77,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: adb %f0, 160(%r15) +; CHECK-SCALAR: adb %f0, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll index 95af309e795..94a256777c7 100644 --- a/test/CodeGen/SystemZ/fp-cmp-02.ll +++ b/test/CodeGen/SystemZ/fp-cmp-02.ll @@ -1,7 +1,10 @@ ; Test 64-bit floating-point comparison. The tests assume a z10 implementation ; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare double @foo() @@ -9,8 +12,9 @@ declare double @foo() define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { ; CHECK-LABEL: f1: ; CHECK: cdbr %f0, %f2 -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq double %f1, %f2 %res = select i1 %cond, i64 %a, i64 %b @@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { ; CHECK-LABEL: f2: ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %f2 = load double , double *%ptr %cond = fcmp oeq double %f1, %f2 @@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { ; CHECK-LABEL: f3: ; CHECK: cdb %f0, 4088(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double, double *%base, i64 511 %f2 = load double , double *%ptr @@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) { ; CHECK-LABEL: f4: ; CHECK: aghi %r4, 4096 ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double, double *%base, i64 512 %f2 = load double , double *%ptr @@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) { ; CHECK-LABEL: f5: ; CHECK: aghi %r4, -8 ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double, double *%base, i64 -1 %f2 = load double , double *%ptr @@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r5, 3 ; CHECK: cdb %f0, 800(%r1,%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr1 = getelementptr double, double *%base, i64 %index %ptr2 = getelementptr double, double *%ptr1, i64 100 @@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: cdb {{%f[0-9]+}}, 160(%r15) +; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 @@ -152,9 +161,12 @@ define double @f7(double *%ptr0) { ; Check comparison with zero. define i64 @f8(i64 %a, i64 %b, double %f) { ; CHECK-LABEL: f8: -; CHECK: ltdbr %f0, %f0 -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR: ltdbr %f0, %f0 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR: lzdr %f1 +; CHECK-VECTOR-NEXT: cdbr %f0, %f1 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq double %f, 0.0 %res = select i1 %cond, i64 %a, i64 %b @@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f) { define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) { ; CHECK-LABEL: f9: ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: jl {{\.L.*}} -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: jl +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 ; CHECK: br %r14 %f1 = load double , double *%ptr %cond = fcmp ogt double %f1, %f2 diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll index ebc174afada..06740ed4b4a 100644 --- a/test/CodeGen/SystemZ/fp-conv-01.ll +++ b/test/CodeGen/SystemZ/fp-conv-01.ll @@ -1,11 +1,15 @@ ; Test floating-point truncations. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s ; Test f64->f32. define float @f1(double %d1, double %d2) { ; CHECK-LABEL: f1: -; CHECK: ledbr %f0, %f2 +; CHECK-SCALAR: ledbr %f0, %f2 +; CHECK-VECTOR: ledbra %f0, 0, %f2, 0 ; CHECK: br %r14 %res = fptrunc double %d2 to float ret float %res @@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) { define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { ; CHECK-LABEL: f5: ; CHECK: ldxbr %f1, %f1 -; CHECK: adbr %f1, %f2 -; CHECK: std %f1, 0(%r2) +; CHECK-SCALAR: adbr %f1, %f2 +; CHECK-SCALAR: std %f1, 0(%r2) +; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2 +; CHECK-VECTOR: std [[REG]], 0(%r2) ; CHECK: br %r14 %val = load fp128 , fp128 *%ptr %conv = fptrunc fp128 %val to double diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll index e9376ba6973..be32bfe7ba9 100644 --- a/test/CodeGen/SystemZ/fp-conv-02.ll +++ b/test/CodeGen/SystemZ/fp-conv-02.ll @@ -1,6 +1,8 @@ ; Test extensions of f32 to f64. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Check register extension. define double @f1(float %val) { @@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %index) { ; to use LDEB if possible. define void @f7(double *%ptr1, float *%ptr2) { ; CHECK-LABEL: f7: -; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK: br %r14 %val0 = load volatile float , float *%ptr2 %val1 = load volatile float , float *%ptr2 diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll index 82eeb480602..f120e7c923d 100644 --- a/test/CodeGen/SystemZ/fp-div-02.ll +++ b/test/CodeGen/SystemZ/fp-div-02.ll @@ -1,6 +1,8 @@ ; Test 64-bit floating-point division. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() @@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: ddb %f0, 160(%r15) +; CHECK-SCALAR: ddb %f0, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll index 31a8fc55d77..843b1b6a6e6 100644 --- a/test/CodeGen/SystemZ/fp-move-01.ll +++ b/test/CodeGen/SystemZ/fp-move-01.ll @@ -1,11 +1,13 @@ ; Test moves between FPRs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32 moves. define float @f1(float %a, float %b) { ; CHECK-LABEL: f1: ; CHECK: ler %f0, %f2 +; CHECK: br %r14 ret float %b } @@ -13,6 +15,7 @@ define float @f1(float %a, float %b) { define double @f2(double %a, double %b) { ; CHECK-LABEL: f2: ; CHECK: ldr %f0, %f2 +; CHECK: br %r14 ret double %b } @@ -22,6 +25,7 @@ define void @f3(fp128 *%x) { ; CHECK-LABEL: f3: ; CHECK: lxr ; CHECK: axbr +; CHECK: br %r14 %val = load volatile fp128 , fp128 *%x %sum = fadd fp128 %val, %val store volatile fp128 %sum, fp128 *%x diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll index d3728d0e585..6650419b2c3 100644 --- a/test/CodeGen/SystemZ/fp-move-04.ll +++ b/test/CodeGen/SystemZ/fp-move-04.ll @@ -1,6 +1,7 @@ ; Test 64-bit floating-point loads. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test the low end of the LD range. define double @f1(double *%src) { diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll index c3ad2a59f66..5361002a97e 100644 --- a/test/CodeGen/SystemZ/fp-move-07.ll +++ b/test/CodeGen/SystemZ/fp-move-07.ll @@ -1,6 +1,7 @@ ; Test 64-bit floating-point stores. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test the low end of the STD range. define void @f1(double *%src, double %val) { diff --git a/test/CodeGen/SystemZ/fp-move-11.ll b/test/CodeGen/SystemZ/fp-move-11.ll new file mode 100644 index 00000000000..ce45019425c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-11.ll @@ -0,0 +1,110 @@ +; Test 32-bit floating-point loads for z13. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test that we use LDE instead of LE - low end of the LE range. +define float @f1(float *%src) { +; CHECK-LABEL: f1: +; CHECK: lde %f0, 0(%r2) +; CHECK: br %r14 + %val = load float, float *%src + ret float %val +} + +; Test that we use LDE instead of LE - high end of the LE range. +define float @f2(float *%src) { +; CHECK-LABEL: f2: +; CHECK: lde %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 1023 + %val = load float, float *%ptr + ret float %val +} + +; Check the next word up, which should use LEY instead of LDE. +define float @f3(float *%src) { +; CHECK-LABEL: f3: +; CHECK: ley %f0, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 1024 + %val = load float, float *%ptr + ret float %val +} + +; Check the high end of the aligned LEY range. +define float @f4(float *%src) { +; CHECK-LABEL: f4: +; CHECK: ley %f0, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 131071 + %val = load float, float *%ptr + ret float %val +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f5(float *%src) { +; CHECK-LABEL: f5: +; CHECK: agfi %r2, 524288 +; CHECK: lde %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 131072 + %val = load float, float *%ptr + ret float %val +} + +; Check the high end of the negative aligned LEY range. +define float @f6(float *%src) { +; CHECK-LABEL: f6: +; CHECK: ley %f0, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 -1 + %val = load float, float *%ptr + ret float %val +} + +; Check the low end of the LEY range. +define float @f7(float *%src) { +; CHECK-LABEL: f7: +; CHECK: ley %f0, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 -131072 + %val = load float, float *%ptr + ret float %val +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f8(float *%src) { +; CHECK-LABEL: f8: +; CHECK: agfi %r2, -524292 +; CHECK: lde %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 -131073 + %val = load float, float *%ptr + ret float %val +} + +; Check that LDE allows an index. +define float @f9(i64 %src, i64 %index) { +; CHECK-LABEL: f9: +; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to float * + %val = load float, float *%ptr + ret float %val +} + +; Check that LEY allows an index. +define float @f10(i64 %src, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to float * + %val = load float, float *%ptr + ret float %val +} diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll index 701304ef3ee..0d52121f41c 100644 --- a/test/CodeGen/SystemZ/fp-mul-03.ll +++ b/test/CodeGen/SystemZ/fp-mul-03.ll @@ -1,6 +1,8 @@ ; Test multiplication of two f64s, producing an f64 result. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() @@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: mdb %f0, 160(%r15) +; CHECK-SCALAR: mdb %f0, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll index b1d0ae3c520..e0b4a5c5d78 100644 --- a/test/CodeGen/SystemZ/fp-mul-07.ll +++ b/test/CodeGen/SystemZ/fp-mul-07.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare double @llvm.fma.f64(double %f1, double %f2, double %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: -; CHECK: madbr %f4, %f0, %f2 -; CHECK: ldr %f0, %f4 +; CHECK-SCALAR: madbr %f4, %f0, %f2 +; CHECK-SCALAR: ldr %f0, %f4 +; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) ret double %res diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll index f2eadf55ff3..927a8064823 100644 --- a/test/CodeGen/SystemZ/fp-mul-09.ll +++ b/test/CodeGen/SystemZ/fp-mul-09.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare double @llvm.fma.f64(double %f1, double %f2, double %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: -; CHECK: msdbr %f4, %f0, %f2 -; CHECK: ldr %f0, %f4 +; CHECK-SCALAR: msdbr %f4, %f0, %f2 +; CHECK-SCALAR: ldr %f0, %f4 +; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %negacc = fsub double -0.0, %acc %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll index 927bcd44d02..fe2e5f67cf5 100644 --- a/test/CodeGen/SystemZ/fp-neg-01.ll +++ b/test/CodeGen/SystemZ/fp-neg-01.ll @@ -1,6 +1,7 @@ ; Test floating-point negation. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32. define float @f1(float %f) { diff --git a/test/CodeGen/SystemZ/fp-round-02.ll b/test/CodeGen/SystemZ/fp-round-02.ll index bd5419dad1d..428261478dc 100644 --- a/test/CodeGen/SystemZ/fp-round-02.ll +++ b/test/CodeGen/SystemZ/fp-round-02.ll @@ -1,6 +1,9 @@ ; Test rounding functions for z196 and above. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s ; Test rint for f32. declare float @llvm.rint.f32(float %f) @@ -16,7 +19,8 @@ define float @f1(float %f) { declare double @llvm.rint.f64(double %f) define double @f2(double %f) { ; CHECK-LABEL: f2: -; CHECK: fidbr %f0, 0, %f0 +; CHECK-SCALAR: fidbr %f0, 0, %f0 +; CHECK-VECTOR: fidbra %f0, 0, %f0, 0 ; CHECK: br %r14 %res = call double @llvm.rint.f64(double %f) ret double %res diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll index a6d987b0d76..a162466064e 100644 --- a/test/CodeGen/SystemZ/fp-sqrt-02.ll +++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll @@ -1,6 +1,8 @@ ; Test 64-bit square root. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @llvm.sqrt.f64(double %f) declare double @sqrt(double) @@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %index) { ; to use SQDB if possible. define void @f7(double *%ptr) { ; CHECK-LABEL: f7: -; CHECK: sqdb {{%f[0-9]+}}, 160(%r15) +; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15) ; CHECK: br %r14 %val0 = load volatile double , double *%ptr %val1 = load volatile double , double *%ptr diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll index f59ec0a31d7..143baac23e1 100644 --- a/test/CodeGen/SystemZ/fp-sub-02.ll +++ b/test/CodeGen/SystemZ/fp-sub-02.ll @@ -1,6 +1,8 @@ ; Test 64-bit floating-point subtraction. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() @@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: sdb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll index 029c6d6d37d..21b8fdb0d67 100644 --- a/test/CodeGen/SystemZ/frame-03.ll +++ b/test/CodeGen/SystemZ/frame-03.ll @@ -2,7 +2,7 @@ ; uses a different register class, but the set of saved and restored ; registers should be the same. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll index 253bbc26c1f..dd810142962 100644 --- a/test/CodeGen/SystemZ/frame-07.ll +++ b/test/CodeGen/SystemZ/frame-07.ll @@ -1,7 +1,7 @@ ; Test the saving and restoring of FPRs in large frames. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s ; Test a frame size that requires some FPRs to be saved and loaded using ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll index 485297a2b21..502e541bafc 100644 --- a/test/CodeGen/SystemZ/frame-17.ll +++ b/test/CodeGen/SystemZ/frame-17.ll @@ -1,6 +1,6 @@ ; Test spilling of FPRs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; We need to save and restore 8 of the 16 FPRs and allocate an additional ; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly diff --git a/test/CodeGen/SystemZ/frame-20.ll b/test/CodeGen/SystemZ/frame-20.ll new file mode 100644 index 00000000000..8d601c6f6d5 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-20.ll @@ -0,0 +1,445 @@ +; Like frame-03.ll, but for z13. In this case we have 16 more registers +; available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; This function should require all FPRs, but no other spill slots. +; We need to save and restore 8 of the 16 FPRs, so the frame size +; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 +; (the caller-allocated part of the frame) + 224. +define void @f1(double *%ptr) { +; CHECK-LABEL: f1: +; CHECK: aghi %r15, -224 +; CHECK: .cfi_def_cfa_offset 384 +; CHECK: std %f8, 216(%r15) +; CHECK: std %f9, 208(%r15) +; CHECK: std %f10, 200(%r15) +; CHECK: std %f11, 192(%r15) +; CHECK: std %f12, 184(%r15) +; CHECK: std %f13, 176(%r15) +; CHECK: std %f14, 168(%r15) +; CHECK: std %f15, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK: .cfi_offset %f15, -224 +; CHECK-DAG: ld %f0, 0(%r2) +; CHECK-DAG: ld %f7, 0(%r2) +; CHECK-DAG: ld %f8, 0(%r2) +; CHECK-DAG: ld %f15, 0(%r2) +; CHECK-DAG: vlrepg %v16, 0(%r2) +; CHECK-DAG: vlrepg %v23, 0(%r2) +; CHECK-DAG: vlrepg %v24, 0(%r2) +; CHECK-DAG: vlrepg %v31, 0(%r2) +; CHECK: ld %f8, 216(%r15) +; CHECK: ld %f9, 208(%r15) +; CHECK: ld %f10, 200(%r15) +; CHECK: ld %f11, 192(%r15) +; CHECK: ld %f12, 184(%r15) +; CHECK: ld %f13, 176(%r15) +; CHECK: ld %f14, 168(%r15) +; CHECK: ld %f15, 160(%r15) +; CHECK: aghi %r15, 224 +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l8 = load volatile double, double *%ptr + %l9 = load volatile double, double *%ptr + %l10 = load volatile double, double *%ptr + %l11 = load volatile double, double *%ptr + %l12 = load volatile double, double *%ptr + %l13 = load volatile double, double *%ptr + %l14 = load volatile double, double *%ptr + %l15 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc8 = fsub double %l8, %acc7 + %acc9 = fsub double %l9, %acc8 + %acc10 = fsub double %l10, %acc9 + %acc11 = fsub double %l11, %acc10 + %acc12 = fsub double %l12, %acc11 + %acc13 = fsub double %l13, %acc12 + %acc14 = fsub double %l14, %acc13 + %acc15 = fsub double %l15, %acc14 + %acc16 = fsub double %l16, %acc15 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc8, double *%ptr + store volatile double %acc9, double *%ptr + store volatile double %acc10, double *%ptr + store volatile double %acc11, double *%ptr + store volatile double %acc12, double *%ptr + store volatile double %acc13, double *%ptr + store volatile double %acc14, double *%ptr + store volatile double %acc15, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} + +; Like f1, but requires one fewer FPR. We allocate in numerical order, +; so %f15 is the one that gets dropped. +define void @f2(double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: aghi %r15, -216 +; CHECK: .cfi_def_cfa_offset 376 +; CHECK: std %f8, 208(%r15) +; CHECK: std %f9, 200(%r15) +; CHECK: std %f10, 192(%r15) +; CHECK: std %f11, 184(%r15) +; CHECK: std %f12, 176(%r15) +; CHECK: std %f13, 168(%r15) +; CHECK: std %f14, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK-NOT: %v15 +; CHECK-NOT: %f15 +; CHECK: ld %f8, 208(%r15) +; CHECK: ld %f9, 200(%r15) +; CHECK: ld %f10, 192(%r15) +; CHECK: ld %f11, 184(%r15) +; CHECK: ld %f12, 176(%r15) +; CHECK: ld %f13, 168(%r15) +; CHECK: ld %f14, 160(%r15) +; CHECK: aghi %r15, 216 +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l8 = load volatile double, double *%ptr + %l9 = load volatile double, double *%ptr + %l10 = load volatile double, double *%ptr + %l11 = load volatile double, double *%ptr + %l12 = load volatile double, double *%ptr + %l13 = load volatile double, double *%ptr + %l14 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc8 = fsub double %l8, %acc7 + %acc9 = fsub double %l9, %acc8 + %acc10 = fsub double %l10, %acc9 + %acc11 = fsub double %l11, %acc10 + %acc12 = fsub double %l12, %acc11 + %acc13 = fsub double %l13, %acc12 + %acc14 = fsub double %l14, %acc13 + %acc16 = fsub double %l16, %acc14 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc8, double *%ptr + store volatile double %acc9, double *%ptr + store volatile double %acc10, double *%ptr + store volatile double %acc11, double *%ptr + store volatile double %acc12, double *%ptr + store volatile double %acc13, double *%ptr + store volatile double %acc14, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} + +; Like f1, but should require only one call-saved FPR. +define void @f3(double *%ptr) { +; CHECK-LABEL: f3: +; CHECK: aghi %r15, -168 +; CHECK: .cfi_def_cfa_offset 328 +; CHECK: std %f8, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK-NOT: {{%[fv]9}} +; CHECK-NOT: {{%[fv]1[0-5]}} +; CHECK: ld %f8, 160(%r15) +; CHECK: aghi %r15, 168 +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l8 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc8 = fsub double %l8, %acc7 + %acc16 = fsub double %l16, %acc8 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc8, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} + +; This function should use all call-clobbered FPRs and vector registers +; but no call-saved ones. It shouldn't need to create a frame. +define void @f4(double *%ptr) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: {{%[fv][89]}} +; CHECK-NOT: {{%[fv]1[0-5]}} +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc16 = fsub double %l16, %acc7 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/vec-abs-05.ll b/test/CodeGen/SystemZ/vec-abs-05.ll index 89142b21854..63210f87b94 100644 --- a/test/CodeGen/SystemZ/vec-abs-05.ll +++ b/test/CodeGen/SystemZ/vec-abs-05.ll @@ -1,7 +1,8 @@ -; Test v2f64 absolute. +; Test f64 and v2f64 absolute. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare double @llvm.fabs.f64(double) declare <2 x double> @llvm.fabs.v2f64(<2 x double>) ; Test a plain absolute. @@ -22,3 +23,24 @@ define <2 x double> @f2(<2 x double> %val) { %ret = fsub <2 x double> , %abs ret <2 x double> %ret } + +; Test an f64 absolute that uses vector registers. +define double @f3(<2 x double> %val) { +; CHECK-LABEL: f3: +; CHECK: wflpdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = call double @llvm.fabs.f64(double %scalar) + ret double %ret +} + +; Test an f64 negative absolute that uses vector registers. +define double @f4(<2 x double> %val) { +; CHECK-LABEL: f4: +; CHECK: wflndb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %abs = call double @llvm.fabs.f64(double %scalar) + %ret = fsub double -0.0, %abs + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-add-01.ll b/test/CodeGen/SystemZ/vec-add-01.ll index 1de2aa2a1b9..31703437767 100644 --- a/test/CodeGen/SystemZ/vec-add-01.ll +++ b/test/CodeGen/SystemZ/vec-add-01.ll @@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, %ret = fadd <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 addition that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfadb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fadd double %scalar1, %scalar2 + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-cmp-06.ll b/test/CodeGen/SystemZ/vec-cmp-06.ll index bdb8744631a..eef57555b48 100644 --- a/test/CodeGen/SystemZ/vec-cmp-06.ll +++ b/test/CodeGen/SystemZ/vec-cmp-06.ll @@ -1,4 +1,4 @@ -; Test v2f64 comparisons. +; Test f64 and v2f64 comparisons. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s @@ -335,3 +335,15 @@ define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ret <2 x double> %ret } + +; Test an f64 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) { +; CHECK-LABEL: f29: +; CHECK: wfcdb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <2 x double> %vec, i32 0 + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/vec-conv-02.ll b/test/CodeGen/SystemZ/vec-conv-02.ll index ceccfc60b37..ab84389f3c8 100644 --- a/test/CodeGen/SystemZ/vec-conv-02.ll +++ b/test/CodeGen/SystemZ/vec-conv-02.ll @@ -11,3 +11,23 @@ define void @f1(<2 x double> %val, <2 x float> *%ptr) { store <2 x float> %res, <2 x float> *%ptr ret void } + +; Test conversion of an f64 in a vector register to an f32. +define float @f2(<2 x double> %vec) { +; CHECK-LABEL: f2: +; CHECK: wledb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %vec, i32 0 + %ret = fptrunc double %scalar to float + ret float %ret +} + +; Test conversion of an f32 in a vector register to an f64. +define double @f3(<4 x float> %vec) { +; CHECK-LABEL: f3: +; CHECK: wldeb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %vec, i32 0 + %ret = fpext float %scalar to double + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-div-01.ll b/test/CodeGen/SystemZ/vec-div-01.ll index 5666444e9da..506d40861d3 100644 --- a/test/CodeGen/SystemZ/vec-div-01.ll +++ b/test/CodeGen/SystemZ/vec-div-01.ll @@ -70,3 +70,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, %ret = fdiv <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 division that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfddb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fdiv double %scalar1, %scalar2 + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-mul-01.ll b/test/CodeGen/SystemZ/vec-mul-01.ll index d0018fa1f8c..5ecc30d4427 100644 --- a/test/CodeGen/SystemZ/vec-mul-01.ll +++ b/test/CodeGen/SystemZ/vec-mul-01.ll @@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, %ret = fmul <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 multiplication that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfmdb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fmul double %scalar1, %scalar2 + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-neg-01.ll b/test/CodeGen/SystemZ/vec-neg-01.ll index 491e24bb34f..b1389ce4d6d 100644 --- a/test/CodeGen/SystemZ/vec-neg-01.ll +++ b/test/CodeGen/SystemZ/vec-neg-01.ll @@ -46,3 +46,13 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) { %ret = fsub <2 x double> , %val ret <2 x double> %ret } + +; Test an f64 negation that uses vector registers. +define double @f6(<2 x double> %val) { +; CHECK-LABEL: f6: +; CHECK: wflcdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = fsub double -0.0, %scalar + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-round-01.ll b/test/CodeGen/SystemZ/vec-round-01.ll index 284b83e96f7..82718276bb0 100644 --- a/test/CodeGen/SystemZ/vec-round-01.ll +++ b/test/CodeGen/SystemZ/vec-round-01.ll @@ -2,6 +2,12 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare double @llvm.rint.f64(double) +declare double @llvm.nearbyint.f64(double) +declare double @llvm.floor.f64(double) +declare double @llvm.ceil.f64(double) +declare double @llvm.trunc.f64(double) +declare double @llvm.round.f64(double) declare <2 x double> @llvm.rint.v2f64(<2 x double>) declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) declare <2 x double> @llvm.floor.v2f64(<2 x double>) @@ -56,3 +62,57 @@ define <2 x double> @f6(<2 x double> %val) { %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val) ret <2 x double> %res } + +define double @f7(<2 x double> %val) { +; CHECK-LABEL: f7: +; CHECK: wfidb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.rint.f64(double %scalar) + ret double %res +} + +define double @f8(<2 x double> %val) { +; CHECK-LABEL: f8: +; CHECK: wfidb %f0, %v24, 4, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.nearbyint.f64(double %scalar) + ret double %res +} + +define double @f9(<2 x double> %val) { +; CHECK-LABEL: f9: +; CHECK: wfidb %f0, %v24, 4, 7 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.floor.f64(double %scalar) + ret double %res +} + +define double @f10(<2 x double> %val) { +; CHECK-LABEL: f10: +; CHECK: wfidb %f0, %v24, 4, 6 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.ceil.f64(double %scalar) + ret double %res +} + +define double @f11(<2 x double> %val) { +; CHECK-LABEL: f11: +; CHECK: wfidb %f0, %v24, 4, 5 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.trunc.f64(double %scalar) + ret double %res +} + +define double @f12(<2 x double> %val) { +; CHECK-LABEL: f12: +; CHECK: wfidb %f0, %v24, 4, 1 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.round.f64(double %scalar) + ret double %res +} diff --git a/test/CodeGen/SystemZ/vec-sqrt-01.ll b/test/CodeGen/SystemZ/vec-sqrt-01.ll index 0160c24a749..5c3ffb3b064 100644 --- a/test/CodeGen/SystemZ/vec-sqrt-01.ll +++ b/test/CodeGen/SystemZ/vec-sqrt-01.ll @@ -1,7 +1,8 @@ -; Test v2f64 square root. +; Test f64 and v2f64 square root. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare double @llvm.sqrt.f64(double) declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) define <2 x double> @f1(<2 x double> %val) { @@ -11,3 +12,12 @@ define <2 x double> @f1(<2 x double> %val) { %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) ret <2 x double> %ret } + +define double @f2(<2 x double> %val) { +; CHECK-LABEL: f2: +; CHECK: wfsqdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = call double @llvm.sqrt.f64(double %scalar) + ret double %ret +} diff --git a/test/CodeGen/SystemZ/vec-sub-01.ll b/test/CodeGen/SystemZ/vec-sub-01.ll index aabf1c9be4a..5620ebcb8c4 100644 --- a/test/CodeGen/SystemZ/vec-sub-01.ll +++ b/test/CodeGen/SystemZ/vec-sub-01.ll @@ -74,3 +74,14 @@ define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, %ret = fsub <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 subtraction that uses vector registers. +define double @f7(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f7: +; CHECK: wfsdb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fsub double %scalar1, %scalar2 + ret double %ret +}