[SystemZ] Add CodeGen support for scalar f64 ops in vector registers

The z13 vector facility includes some instructions that operate only on the
high f64 in a v2f64, effectively extending the FP register set from 16
to 32 registers.  It's still better to use the old instructions if the
operands happen to fit though, since the older instructions have a shorter
encoding.

Based on a patch by Richard Sandiford.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236524 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ulrich Weigand
2015-05-05 19:28:34 +00:00
parent 878c6281d3
commit cf0fa9b9dd
40 changed files with 1102 additions and 80 deletions

View File

@@ -40,8 +40,8 @@ include "SystemZOperands.td"
include "SystemZPatterns.td"
include "SystemZInstrFormats.td"
include "SystemZInstrInfo.td"
include "SystemZInstrFP.td"
include "SystemZInstrVector.td"
include "SystemZInstrFP.td"
def SystemZInstrInfo : InstrInfo {}

View File

@@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
Context);
}
// MI loads the high part of a vector from memory. Return an instruction
// that uses replicating vector load Opcode to do the same thing.
static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
return MCInstBuilder(Opcode)
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
.addReg(MI->getOperand(3).getReg());
}
// MI stores the high part of a vector to memory. Return an instruction
// that uses elemental vector store Opcode to do the same thing.
static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
return MCInstBuilder(Opcode)
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
.addReg(MI->getOperand(3).getReg())
.addImm(0);
}
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
@@ -158,6 +179,29 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
break;
case SystemZ::VLR32:
case SystemZ::VLR64:
LoweredMI = MCInstBuilder(SystemZ::VLR)
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
break;
case SystemZ::VL32:
LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
break;
case SystemZ::VL64:
LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
break;
case SystemZ::VST32:
LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
break;
case SystemZ::VST64:
LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
break;
case SystemZ::LFER:
LoweredMI = MCInstBuilder(SystemZ::VLGVF)
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))

View File

@@ -91,9 +91,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
if (Subtarget.hasVector()) {
addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
} else {
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
}
addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
if (Subtarget.hasVector()) {

View File

@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
}
defm : CompareZeroFP<LTEBRCompare, FP32>;
defm : CompareZeroFP<LTDBRCompare, FP64>;
defm : CompareZeroFP<LTXBRCompare, FP128>;
// Note that the comparison against zero operation is not available if we
// have vector support, since load-and-test instructions will partially
// clobber the target (vector) register.
let Predicates = [FeatureNoVector] in {
defm : CompareZeroFP<LTEBRCompare, FP32>;
defm : CompareZeroFP<LTDBRCompare, FP64>;
defm : CompareZeroFP<LTXBRCompare, FP128>;
}
// Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
// For z13 we prefer LDE over LE to avoid partial register dependencies.
def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {

View File

@@ -2151,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode,
// A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero.
// Note that the comparison against zero operation is not available if we
// have vector support, since load-and-test instructions will partially
// clobber the target (vector) register.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
RegisterOperand cls> {
def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
let isCodeGenOnly = 1 in
let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
}
@@ -2401,6 +2404,23 @@ class Alias<int size, dag outs, dag ins, list<dag> pattern>
class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
: Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
// An alias of a UnaryVRR*, but with different register sizes.
class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
: Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
[(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
// An alias of a UnaryVRX, but with different register sizes.
class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
AddressingMode mode = bdxaddr12only>
: Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
[(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
// An alias of a StoreVRX, but with different register sizes.
class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
AddressingMode mode = bdxaddr12only>
: Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
[(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
// An alias of a BinaryRI, but with different register sizes.
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>

View File

@@ -578,6 +578,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::LDR;
else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LXR;
else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR32;
else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR64;
else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR;
else
@@ -1118,6 +1122,12 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
} else if (RC == &SystemZ::FP128BitRegClass) {
LoadOpcode = SystemZ::LX;
StoreOpcode = SystemZ::STX;
} else if (RC == &SystemZ::VR32BitRegClass) {
LoadOpcode = SystemZ::VL32;
StoreOpcode = SystemZ::VST32;
} else if (RC == &SystemZ::VR64BitRegClass) {
LoadOpcode = SystemZ::VL64;
StoreOpcode = SystemZ::VST64;
} else if (RC == &SystemZ::VF128BitRegClass ||
RC == &SystemZ::VR128BitRegClass) {
LoadOpcode = SystemZ::VL;

View File

@@ -14,6 +14,8 @@
let Predicates = [FeatureVector] in {
// Register move.
def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
// Load GR from VR element.
def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
@@ -123,6 +125,13 @@ let Predicates = [FeatureVector] in {
def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
(VLREPG bdxaddr12only:$addr)>;
// Use VLREP to load subvectors. These patterns use "12pair" because
// LEY and LDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
// Load logical element and zero.
def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
@@ -193,6 +202,13 @@ let Predicates = [FeatureVector] in {
imm32zx1:$index),
(VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
// Use VSTE to store subvectors. These patterns use "12pair" because
// STEY and STDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
// Scatter element.
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
@@ -778,7 +794,7 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
let Predicates = [FeatureVector] in {
// Add.
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>;
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
// Convert from fixed 64-bit.
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
@@ -804,53 +820,55 @@ let Predicates = [FeatureVector] in {
// Divide.
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>;
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
// Load FP integer.
def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
defm : VectorRounding<VFIDB, v128db>;
defm : VectorRounding<WFIDB, v64db>;
// Load lengthened.
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, null_frag, v64db, v32eb, 2, 8>;
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
// Load rounded,
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
// Multiply.
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>;
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
// Multiply and add.
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>;
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
// Multiply and subtract.
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>;
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
// Load complement,
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>;
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
// Load negative.
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>;
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
// Load positive.
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>;
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
// Square root.
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>;
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
// Subtract.
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>;
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
// Test data class immediate.
let Defs = [CC] in {
@@ -866,7 +884,7 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in {
// Compare scalar.
let Defs = [CC] in
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, null_frag, v64db, 3>;
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
// Compare and signal scalar.
let Defs = [CC] in

View File

@@ -15,6 +15,7 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
@@ -36,6 +37,10 @@ public:
private:
bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
unsigned LLIxL, unsigned LLIxH);
bool shortenOn0(MachineInstr &MI, unsigned Opcode);
bool shortenOn01(MachineInstr &MI, unsigned Opcode);
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
return false;
}
// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
return true;
}
return false;
}
// Change MI's opcode to Opcode if register operands 0 and 1 have a
// 4-bit encoding.
bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
return true;
}
return false;
}
// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
// 4-bit encoding and if operands 0 and 1 are tied.
bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
return true;
}
return false;
}
// MI is a vector-style conversion instruction with the operand order:
// destination, source, exact-suppress, rounding-mode. If both registers
// have a 4-bit encoding then change it to Opcode, which has operand order:
// destination, rouding-mode, source, exact-suppress.
bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
MachineOperand Dest(MI.getOperand(0));
MachineOperand Src(MI.getOperand(1));
MachineOperand Suppress(MI.getOperand(2));
MachineOperand Mode(MI.getOperand(3));
MI.RemoveOperand(3);
MI.RemoveOperand(2);
MI.RemoveOperand(1);
MI.RemoveOperand(0);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
.addOperand(Dest)
.addOperand(Mode)
.addOperand(Src)
.addOperand(Suppress);
return true;
}
return false;
}
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
// Iterate backwards through the block looking for instructions to change.
for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
if (Opcode == SystemZ::IILF)
switch (MI.getOpcode()) {
case SystemZ::IILF:
Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
SystemZ::LLILH);
else if (Opcode == SystemZ::IIHF)
break;
case SystemZ::IIHF:
Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
SystemZ::LLIHH);
break;
case SystemZ::WFADB:
Changed |= shortenOn001(MI, SystemZ::ADBR);
break;
case SystemZ::WFDDB:
Changed |= shortenOn001(MI, SystemZ::DDBR);
break;
case SystemZ::WFIDB:
Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
break;
case SystemZ::WLDEB:
Changed |= shortenOn01(MI, SystemZ::LDEBR);
break;
case SystemZ::WLEDB:
Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
break;
case SystemZ::WFMDB:
Changed |= shortenOn001(MI, SystemZ::MDBR);
break;
case SystemZ::WFLCDB:
Changed |= shortenOn01(MI, SystemZ::LCDBR);
break;
case SystemZ::WFLNDB:
Changed |= shortenOn01(MI, SystemZ::LNDBR);
break;
case SystemZ::WFLPDB:
Changed |= shortenOn01(MI, SystemZ::LPDBR);
break;
case SystemZ::WFSQDB:
Changed |= shortenOn01(MI, SystemZ::SQDBR);
break;
case SystemZ::WFSDB:
Changed |= shortenOn001(MI, SystemZ::SDBR);
break;
case SystemZ::WFCDB:
Changed |= shortenOn01(MI, SystemZ::CDBR);
break;
case SystemZ::VL32:
// For z13 we prefer LDE over LE to avoid partial register dependencies.
Changed |= shortenOn0(MI, SystemZ::LDE32);
break;
case SystemZ::VST32:
Changed |= shortenOn0(MI, SystemZ::STE);
break;
case SystemZ::VL64:
Changed |= shortenOn0(MI, SystemZ::LD);
break;
case SystemZ::VST64:
Changed |= shortenOn0(MI, SystemZ::STD);
break;
}
unsigned UsedLow = 0;
unsigned UsedHigh = 0;
for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();

View File

@@ -1,6 +1,7 @@
; Test floating-point absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32.
declare float @llvm.fabs.f32(float %f)

View File

@@ -1,6 +1,7 @@
; Test negated floating-point absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32.
declare float @llvm.fabs.f32(float %f)

View File

@@ -1,7 +1,8 @@
; Test 64-bit floating-point addition.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
; Check register addition.
@@ -76,7 +77,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: adb %f0, 160(%r15)
; CHECK-SCALAR: adb %f0, 160(%r15)
; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -1,7 +1,10 @@
; Test 64-bit floating-point comparison. The tests assume a z10 implementation
; of select, using conditional branches rather than LOCGR.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @foo()
@@ -9,8 +12,9 @@ declare double @foo()
define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
; CHECK-LABEL: f1:
; CHECK: cdbr %f0, %f2
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
@@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%f2 = load double , double *%ptr
%cond = fcmp oeq double %f1, %f2
@@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f3:
; CHECK: cdb %f0, 4088(%r4)
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 511
%f2 = load double , double *%ptr
@@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r4, 4096
; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 512
%f2 = load double , double *%ptr
@@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r4, -8
; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 -1
%f2 = load double , double *%ptr
@@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
; CHECK-LABEL: f6:
; CHECK: sllg %r1, %r5, 3
; CHECK: cdb %f0, 800(%r1,%r4)
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%ptr1 = getelementptr double, double *%base, i64 %index
%ptr2 = getelementptr double, double *%ptr1, i64 100
@@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: cdb {{%f[0-9]+}}, 160(%r15)
; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4
@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) {
; Check comparison with zero.
define i64 @f8(i64 %a, i64 %b, double %f) {
; CHECK-LABEL: f8:
; CHECK: ltdbr %f0, %f0
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK-SCALAR: ltdbr %f0, %f0
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR: lzdr %f1
; CHECK-VECTOR-NEXT: cdbr %f0, %f1
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%cond = fcmp oeq double %f, 0.0
%res = select i1 %cond, i64 %a, i64 %b
@@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
; CHECK-LABEL: f9:
; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: jl {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK-SCALAR-NEXT: jl
; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
; CHECK: br %r14
%f1 = load double , double *%ptr
%cond = fcmp ogt double %f1, %f2

View File

@@ -1,11 +1,15 @@
; Test floating-point truncations.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; Test f64->f32.
define float @f1(double %d1, double %d2) {
; CHECK-LABEL: f1:
; CHECK: ledbr %f0, %f2
; CHECK-SCALAR: ledbr %f0, %f2
; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
; CHECK: br %r14
%res = fptrunc double %d2 to float
ret float %res
@@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) {
define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
; CHECK-LABEL: f5:
; CHECK: ldxbr %f1, %f1
; CHECK: adbr %f1, %f2
; CHECK: std %f1, 0(%r2)
; CHECK-SCALAR: adbr %f1, %f2
; CHECK-SCALAR: std %f1, 0(%r2)
; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
; CHECK-VECTOR: std [[REG]], 0(%r2)
; CHECK: br %r14
%val = load fp128 , fp128 *%ptr
%conv = fptrunc fp128 %val to double

View File

@@ -1,6 +1,8 @@
; Test extensions of f32 to f64.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Check register extension.
define double @f1(float %val) {
@@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %index) {
; to use LDEB if possible.
define void @f7(double *%ptr1, float *%ptr2) {
; CHECK-LABEL: f7:
; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
%val0 = load volatile float , float *%ptr2
%val1 = load volatile float , float *%ptr2

View File

@@ -1,6 +1,8 @@
; Test 64-bit floating-point division.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: ddb %f0, 160(%r15)
; CHECK-SCALAR: ddb %f0, 160(%r15)
; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -1,11 +1,13 @@
; Test moves between FPRs.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32 moves.
define float @f1(float %a, float %b) {
; CHECK-LABEL: f1:
; CHECK: ler %f0, %f2
; CHECK: br %r14
ret float %b
}
@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) {
define double @f2(double %a, double %b) {
; CHECK-LABEL: f2:
; CHECK: ldr %f0, %f2
; CHECK: br %r14
ret double %b
}
@@ -22,6 +25,7 @@ define void @f3(fp128 *%x) {
; CHECK-LABEL: f3:
; CHECK: lxr
; CHECK: axbr
; CHECK: br %r14
%val = load volatile fp128 , fp128 *%x
%sum = fadd fp128 %val, %val
store volatile fp128 %sum, fp128 *%x

View File

@@ -1,6 +1,7 @@
; Test 64-bit floating-point loads.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the low end of the LD range.
define double @f1(double *%src) {

View File

@@ -1,6 +1,7 @@
; Test 64-bit floating-point stores.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the low end of the STD range.
define void @f1(double *%src, double %val) {

View File

@@ -0,0 +1,110 @@
; Test 32-bit floating-point loads for z13.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test that we use LDE instead of LE - low end of the LE range.
define float @f1(float *%src) {
; CHECK-LABEL: f1:
; CHECK: lde %f0, 0(%r2)
; CHECK: br %r14
%val = load float, float *%src
ret float %val
}
; Test that we use LDE instead of LE - high end of the LE range.
define float @f2(float *%src) {
; CHECK-LABEL: f2:
; CHECK: lde %f0, 4092(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 1023
%val = load float, float *%ptr
ret float %val
}
; Check the next word up, which should use LEY instead of LDE.
define float @f3(float *%src) {
; CHECK-LABEL: f3:
; CHECK: ley %f0, 4096(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 1024
%val = load float, float *%ptr
ret float %val
}
; Check the high end of the aligned LEY range.
define float @f4(float *%src) {
; CHECK-LABEL: f4:
; CHECK: ley %f0, 524284(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 131071
%val = load float, float *%ptr
ret float %val
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define float @f5(float *%src) {
; CHECK-LABEL: f5:
; CHECK: agfi %r2, 524288
; CHECK: lde %f0, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 131072
%val = load float, float *%ptr
ret float %val
}
; Check the high end of the negative aligned LEY range.
define float @f6(float *%src) {
; CHECK-LABEL: f6:
; CHECK: ley %f0, -4(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 -1
%val = load float, float *%ptr
ret float %val
}
; Check the low end of the LEY range.
define float @f7(float *%src) {
; CHECK-LABEL: f7:
; CHECK: ley %f0, -524288(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 -131072
%val = load float, float *%ptr
ret float %val
}
; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define float @f8(float *%src) {
; CHECK-LABEL: f8:
; CHECK: agfi %r2, -524292
; CHECK: lde %f0, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 -131073
%val = load float, float *%ptr
ret float %val
}
; Check that LDE allows an index.
define float @f9(i64 %src, i64 %index) {
; CHECK-LABEL: f9:
; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to float *
%val = load float, float *%ptr
ret float %val
}
; Check that LEY allows an index.
define float @f10(i64 %src, i64 %index) {
; CHECK-LABEL: f10:
; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to float *
%val = load float, float *%ptr
ret float %val
}

View File

@@ -1,6 +1,8 @@
; Test multiplication of two f64s, producing an f64 result.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: mdb %f0, 160(%r15)
; CHECK-SCALAR: mdb %f0, 160(%r15)
; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -1,11 +1,15 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
define double @f1(double %f1, double %f2, double %acc) {
; CHECK-LABEL: f1:
; CHECK: madbr %f4, %f0, %f2
; CHECK: ldr %f0, %f4
; CHECK-SCALAR: madbr %f4, %f0, %f2
; CHECK-SCALAR: ldr %f0, %f4
; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
; CHECK: br %r14
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res

View File

@@ -1,11 +1,15 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
define double @f1(double %f1, double %f2, double %acc) {
; CHECK-LABEL: f1:
; CHECK: msdbr %f4, %f0, %f2
; CHECK: ldr %f0, %f4
; CHECK-SCALAR: msdbr %f4, %f0, %f2
; CHECK-SCALAR: ldr %f0, %f4
; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
; CHECK: br %r14
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)

View File

@@ -1,6 +1,7 @@
; Test floating-point negation.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32.
define float @f1(float %f) {

View File

@@ -1,6 +1,9 @@
; Test rounding functions for z196 and above.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; Test rint for f32.
declare float @llvm.rint.f32(float %f)
@@ -16,7 +19,8 @@ define float @f1(float %f) {
declare double @llvm.rint.f64(double %f)
define double @f2(double %f) {
; CHECK-LABEL: f2:
; CHECK: fidbr %f0, 0, %f0
; CHECK-SCALAR: fidbr %f0, 0, %f0
; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
; CHECK: br %r14
%res = call double @llvm.rint.f64(double %f)
ret double %res

View File

@@ -1,6 +1,8 @@
; Test 64-bit square root.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.sqrt.f64(double %f)
declare double @sqrt(double)
@@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %index) {
; to use SQDB if possible.
define void @f7(double *%ptr) {
; CHECK-LABEL: f7:
; CHECK: sqdb {{%f[0-9]+}}, 160(%r15)
; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14
%val0 = load volatile double , double *%ptr
%val1 = load volatile double , double *%ptr

View File

@@ -1,6 +1,8 @@
; Test 64-bit floating-point subtraction.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: sdb %f0, 16{{[04]}}(%r15)
; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -2,7 +2,7 @@
; uses a different register class, but the set of saved and restored
; registers should be the same.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; This function should require all FPRs, but no other spill slots.
; We need to save and restore 8 of the 16 FPRs, so the frame size

View File

@@ -1,7 +1,7 @@
; Test the saving and restoring of FPRs in large frames.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
; Test a frame size that requires some FPRs to be saved and loaded using
; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.

View File

@@ -1,6 +1,6 @@
; Test spilling of FPRs.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; We need to save and restore 8 of the 16 FPRs and allocate an additional
; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly

View File

@@ -0,0 +1,445 @@
; Like frame-03.ll, but for z13. In this case we have 16 more registers
; available.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; This function should require all FPRs, but no other spill slots.
; We need to save and restore 8 of the 16 FPRs, so the frame size
; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160
; (the caller-allocated part of the frame) + 224.
define void @f1(double *%ptr) {
; CHECK-LABEL: f1:
; CHECK: aghi %r15, -224
; CHECK: .cfi_def_cfa_offset 384
; CHECK: std %f8, 216(%r15)
; CHECK: std %f9, 208(%r15)
; CHECK: std %f10, 200(%r15)
; CHECK: std %f11, 192(%r15)
; CHECK: std %f12, 184(%r15)
; CHECK: std %f13, 176(%r15)
; CHECK: std %f14, 168(%r15)
; CHECK: std %f15, 160(%r15)
; CHECK: .cfi_offset %f8, -168
; CHECK: .cfi_offset %f9, -176
; CHECK: .cfi_offset %f10, -184
; CHECK: .cfi_offset %f11, -192
; CHECK: .cfi_offset %f12, -200
; CHECK: .cfi_offset %f13, -208
; CHECK: .cfi_offset %f14, -216
; CHECK: .cfi_offset %f15, -224
; CHECK-DAG: ld %f0, 0(%r2)
; CHECK-DAG: ld %f7, 0(%r2)
; CHECK-DAG: ld %f8, 0(%r2)
; CHECK-DAG: ld %f15, 0(%r2)
; CHECK-DAG: vlrepg %v16, 0(%r2)
; CHECK-DAG: vlrepg %v23, 0(%r2)
; CHECK-DAG: vlrepg %v24, 0(%r2)
; CHECK-DAG: vlrepg %v31, 0(%r2)
; CHECK: ld %f8, 216(%r15)
; CHECK: ld %f9, 208(%r15)
; CHECK: ld %f10, 200(%r15)
; CHECK: ld %f11, 192(%r15)
; CHECK: ld %f12, 184(%r15)
; CHECK: ld %f13, 176(%r15)
; CHECK: ld %f14, 168(%r15)
; CHECK: ld %f15, 160(%r15)
; CHECK: aghi %r15, 224
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l8 = load volatile double, double *%ptr
%l9 = load volatile double, double *%ptr
%l10 = load volatile double, double *%ptr
%l11 = load volatile double, double *%ptr
%l12 = load volatile double, double *%ptr
%l13 = load volatile double, double *%ptr
%l14 = load volatile double, double *%ptr
%l15 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc8 = fsub double %l8, %acc7
%acc9 = fsub double %l9, %acc8
%acc10 = fsub double %l10, %acc9
%acc11 = fsub double %l11, %acc10
%acc12 = fsub double %l12, %acc11
%acc13 = fsub double %l13, %acc12
%acc14 = fsub double %l14, %acc13
%acc15 = fsub double %l15, %acc14
%acc16 = fsub double %l16, %acc15
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc8, double *%ptr
store volatile double %acc9, double *%ptr
store volatile double %acc10, double *%ptr
store volatile double %acc11, double *%ptr
store volatile double %acc12, double *%ptr
store volatile double %acc13, double *%ptr
store volatile double %acc14, double *%ptr
store volatile double %acc15, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}
; Like f1, but requires one fewer FPR. We allocate in numerical order,
; so %f15 is the one that gets dropped.
define void @f2(double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: aghi %r15, -216
; CHECK: .cfi_def_cfa_offset 376
; CHECK: std %f8, 208(%r15)
; CHECK: std %f9, 200(%r15)
; CHECK: std %f10, 192(%r15)
; CHECK: std %f11, 184(%r15)
; CHECK: std %f12, 176(%r15)
; CHECK: std %f13, 168(%r15)
; CHECK: std %f14, 160(%r15)
; CHECK: .cfi_offset %f8, -168
; CHECK: .cfi_offset %f9, -176
; CHECK: .cfi_offset %f10, -184
; CHECK: .cfi_offset %f11, -192
; CHECK: .cfi_offset %f12, -200
; CHECK: .cfi_offset %f13, -208
; CHECK: .cfi_offset %f14, -216
; CHECK-NOT: %v15
; CHECK-NOT: %f15
; CHECK: ld %f8, 208(%r15)
; CHECK: ld %f9, 200(%r15)
; CHECK: ld %f10, 192(%r15)
; CHECK: ld %f11, 184(%r15)
; CHECK: ld %f12, 176(%r15)
; CHECK: ld %f13, 168(%r15)
; CHECK: ld %f14, 160(%r15)
; CHECK: aghi %r15, 216
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l8 = load volatile double, double *%ptr
%l9 = load volatile double, double *%ptr
%l10 = load volatile double, double *%ptr
%l11 = load volatile double, double *%ptr
%l12 = load volatile double, double *%ptr
%l13 = load volatile double, double *%ptr
%l14 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc8 = fsub double %l8, %acc7
%acc9 = fsub double %l9, %acc8
%acc10 = fsub double %l10, %acc9
%acc11 = fsub double %l11, %acc10
%acc12 = fsub double %l12, %acc11
%acc13 = fsub double %l13, %acc12
%acc14 = fsub double %l14, %acc13
%acc16 = fsub double %l16, %acc14
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc8, double *%ptr
store volatile double %acc9, double *%ptr
store volatile double %acc10, double *%ptr
store volatile double %acc11, double *%ptr
store volatile double %acc12, double *%ptr
store volatile double %acc13, double *%ptr
store volatile double %acc14, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}
; Like f1, but should require only one call-saved FPR.
define void @f3(double *%ptr) {
; CHECK-LABEL: f3:
; CHECK: aghi %r15, -168
; CHECK: .cfi_def_cfa_offset 328
; CHECK: std %f8, 160(%r15)
; CHECK: .cfi_offset %f8, -168
; CHECK-NOT: {{%[fv]9}}
; CHECK-NOT: {{%[fv]1[0-5]}}
; CHECK: ld %f8, 160(%r15)
; CHECK: aghi %r15, 168
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l8 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc8 = fsub double %l8, %acc7
%acc16 = fsub double %l16, %acc8
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc8, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}
; This function should use all call-clobbered FPRs and vector registers
; but no call-saved ones. It shouldn't need to create a frame.
define void @f4(double *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: %r15
; CHECK-NOT: {{%[fv][89]}}
; CHECK-NOT: {{%[fv]1[0-5]}}
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc16 = fsub double %l16, %acc7
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}

View File

@@ -1,7 +1,8 @@
; Test v2f64 absolute.
; Test f64 and v2f64 absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.fabs.f64(double)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
; Test a plain absolute.
@@ -22,3 +23,24 @@ define <2 x double> @f2(<2 x double> %val) {
%ret = fsub <2 x double> <double -0.0, double -0.0>, %abs
ret <2 x double> %ret
}
; Test an f64 absolute that uses vector registers.
define double @f3(<2 x double> %val) {
; CHECK-LABEL: f3:
; CHECK: wflpdb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%ret = call double @llvm.fabs.f64(double %scalar)
ret double %ret
}
; Test an f64 negative absolute that uses vector registers.
define double @f4(<2 x double> %val) {
; CHECK-LABEL: f4:
; CHECK: wflndb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%abs = call double @llvm.fabs.f64(double %scalar)
%ret = fsub double -0.0, %abs
ret double %ret
}

View File

@@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
%ret = fadd <2 x double> %val1, %val2
ret <2 x double> %ret
}
; Test an f64 addition that uses vector registers.
define double @f6(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
; CHECK: wfadb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fadd double %scalar1, %scalar2
ret double %ret
}

View File

@@ -1,4 +1,4 @@
; Test v2f64 comparisons.
; Test f64 and v2f64 comparisons.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
@@ -335,3 +335,15 @@ define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
ret <2 x double> %ret
}
; Test an f64 comparison that uses vector registers.
define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) {
; CHECK-LABEL: f29:
; CHECK: wfcdb %f0, %v24
; CHECK-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%f2 = extractelement <2 x double> %vec, i32 0
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}

View File

@@ -11,3 +11,23 @@ define void @f1(<2 x double> %val, <2 x float> *%ptr) {
store <2 x float> %res, <2 x float> *%ptr
ret void
}
; Test conversion of an f64 in a vector register to an f32.
define float @f2(<2 x double> %vec) {
; CHECK-LABEL: f2:
; CHECK: wledb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %vec, i32 0
%ret = fptrunc double %scalar to float
ret float %ret
}
; Test conversion of an f32 in a vector register to an f64.
define double @f3(<4 x float> %vec) {
; CHECK-LABEL: f3:
; CHECK: wldeb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <4 x float> %vec, i32 0
%ret = fpext float %scalar to double
ret double %ret
}

View File

@@ -70,3 +70,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
%ret = fdiv <2 x double> %val1, %val2
ret <2 x double> %ret
}
; Test an f64 division that uses vector registers.
define double @f6(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
; CHECK: wfddb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fdiv double %scalar1, %scalar2
ret double %ret
}

View File

@@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
%ret = fmul <2 x double> %val1, %val2
ret <2 x double> %ret
}
; Test an f64 multiplication that uses vector registers.
define double @f6(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
; CHECK: wfmdb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fmul double %scalar1, %scalar2
ret double %ret
}

View File

@@ -46,3 +46,13 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) {
%ret = fsub <2 x double> <double -0.0, double -0.0>, %val
ret <2 x double> %ret
}
; Test an f64 negation that uses vector registers.
define double @f6(<2 x double> %val) {
; CHECK-LABEL: f6:
; CHECK: wflcdb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%ret = fsub double -0.0, %scalar
ret double %ret
}

View File

@@ -2,6 +2,12 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.rint.f64(double)
declare double @llvm.nearbyint.f64(double)
declare double @llvm.floor.f64(double)
declare double @llvm.ceil.f64(double)
declare double @llvm.trunc.f64(double)
declare double @llvm.round.f64(double)
declare <2 x double> @llvm.rint.v2f64(<2 x double>)
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
declare <2 x double> @llvm.floor.v2f64(<2 x double>)
@@ -56,3 +62,57 @@ define <2 x double> @f6(<2 x double> %val) {
%res = call <2 x double> @llvm.round.v2f64(<2 x double> %val)
ret <2 x double> %res
}
define double @f7(<2 x double> %val) {
; CHECK-LABEL: f7:
; CHECK: wfidb %f0, %v24, 0, 0
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.rint.f64(double %scalar)
ret double %res
}
define double @f8(<2 x double> %val) {
; CHECK-LABEL: f8:
; CHECK: wfidb %f0, %v24, 4, 0
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.nearbyint.f64(double %scalar)
ret double %res
}
define double @f9(<2 x double> %val) {
; CHECK-LABEL: f9:
; CHECK: wfidb %f0, %v24, 4, 7
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.floor.f64(double %scalar)
ret double %res
}
define double @f10(<2 x double> %val) {
; CHECK-LABEL: f10:
; CHECK: wfidb %f0, %v24, 4, 6
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.ceil.f64(double %scalar)
ret double %res
}
define double @f11(<2 x double> %val) {
; CHECK-LABEL: f11:
; CHECK: wfidb %f0, %v24, 4, 5
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.trunc.f64(double %scalar)
ret double %res
}
define double @f12(<2 x double> %val) {
; CHECK-LABEL: f12:
; CHECK: wfidb %f0, %v24, 4, 1
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.round.f64(double %scalar)
ret double %res
}

View File

@@ -1,7 +1,8 @@
; Test v2f64 square root.
; Test f64 and v2f64 square root.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.sqrt.f64(double)
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
define <2 x double> @f1(<2 x double> %val) {
@@ -11,3 +12,12 @@ define <2 x double> @f1(<2 x double> %val) {
%ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
ret <2 x double> %ret
}
define double @f2(<2 x double> %val) {
; CHECK-LABEL: f2:
; CHECK: wfsqdb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%ret = call double @llvm.sqrt.f64(double %scalar)
ret double %ret
}

View File

@@ -74,3 +74,14 @@ define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
%ret = fsub <2 x double> %val1, %val2
ret <2 x double> %ret
}
; Test an f64 subtraction that uses vector registers.
define double @f7(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f7:
; CHECK: wfsdb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fsub double %scalar1, %scalar2
ret double %ret
}