[SystemZ] Add CodeGen support for scalar f64 ops in vector registers

The z13 vector facility includes some instructions that operate only on the
high f64 in a v2f64, effectively extending the FP register set from 16
to 32 registers.  It's still better to use the old instructions if the
operands happen to fit though, since the older instructions have a shorter
encoding.

Based on a patch by Richard Sandiford.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236524 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ulrich Weigand
2015-05-05 19:28:34 +00:00
parent 878c6281d3
commit cf0fa9b9dd
40 changed files with 1102 additions and 80 deletions

View File

@@ -40,8 +40,8 @@ include "SystemZOperands.td"
include "SystemZPatterns.td" include "SystemZPatterns.td"
include "SystemZInstrFormats.td" include "SystemZInstrFormats.td"
include "SystemZInstrInfo.td" include "SystemZInstrInfo.td"
include "SystemZInstrFP.td"
include "SystemZInstrVector.td" include "SystemZInstrVector.td"
include "SystemZInstrFP.td"
def SystemZInstrInfo : InstrInfo {} def SystemZInstrInfo : InstrInfo {}

View File

@@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
Context); Context);
} }
// MI loads the high part of a vector from memory. Return an instruction
// that uses replicating vector load Opcode to do the same thing.
static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
return MCInstBuilder(Opcode)
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
.addReg(MI->getOperand(3).getReg());
}
// MI stores the high part of a vector to memory. Return an instruction
// that uses elemental vector store Opcode to do the same thing.
static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
return MCInstBuilder(Opcode)
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
.addReg(MI->getOperand(3).getReg())
.addImm(0);
}
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this); SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI; MCInst LoweredMI;
@@ -158,6 +179,29 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
break; break;
case SystemZ::VLR32:
case SystemZ::VLR64:
LoweredMI = MCInstBuilder(SystemZ::VLR)
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
break;
case SystemZ::VL32:
LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
break;
case SystemZ::VL64:
LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
break;
case SystemZ::VST32:
LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
break;
case SystemZ::VST64:
LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
break;
case SystemZ::LFER: case SystemZ::LFER:
LoweredMI = MCInstBuilder(SystemZ::VLGVF) LoweredMI = MCInstBuilder(SystemZ::VLGVF)
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))

View File

@@ -92,8 +92,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
else else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
if (Subtarget.hasVector()) {
addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
} else {
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
}
addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
if (Subtarget.hasVector()) { if (Subtarget.hasVector()) {

View File

@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
} }
defm : CompareZeroFP<LTEBRCompare, FP32>; // Note that the comparison against zero operation is not available if we
defm : CompareZeroFP<LTDBRCompare, FP64>; // have vector support, since load-and-test instructions will partially
defm : CompareZeroFP<LTXBRCompare, FP128>; // clobber the target (vector) register.
let Predicates = [FeatureNoVector] in {
defm : CompareZeroFP<LTEBRCompare, FP32>;
defm : CompareZeroFP<LTDBRCompare, FP64>;
defm : CompareZeroFP<LTXBRCompare, FP128>;
}
// Moves between 64-bit integer and floating-point registers. // Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
// For z13 we prefer LDE over LE to avoid partial register dependencies.
def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
// These instructions are split after register allocation, so we don't // These instructions are split after register allocation, so we don't
// want a custom inserter. // want a custom inserter.
let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {

View File

@@ -2151,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode,
// A floating-point load-and test operation. Create both a normal unary // A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero. // operation and one that acts as a comparison against zero.
// Note that the comparison against zero operation is not available if we
// have vector support, since load-and-test instructions will partially
// clobber the target (vector) register.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode, multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
RegisterOperand cls> { RegisterOperand cls> {
def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>; def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
let isCodeGenOnly = 1 in let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>; def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
} }
@@ -2401,6 +2404,23 @@ class Alias<int size, dag outs, dag ins, list<dag> pattern>
class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2> class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
: Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
// An alias of a UnaryVRR*, but with different register sizes.
class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
: Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
[(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
// An alias of a UnaryVRX, but with different register sizes.
class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
AddressingMode mode = bdxaddr12only>
: Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
[(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
// An alias of a StoreVRX, but with different register sizes.
class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
AddressingMode mode = bdxaddr12only>
: Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
[(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
// An alias of a BinaryRI, but with different register sizes. // An alias of a BinaryRI, but with different register sizes.
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls, class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm> Immediate imm>

View File

@@ -578,6 +578,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::LDR; Opcode = SystemZ::LDR;
else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LXR; Opcode = SystemZ::LXR;
else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR32;
else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR64;
else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR; Opcode = SystemZ::VLR;
else else
@@ -1118,6 +1122,12 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
} else if (RC == &SystemZ::FP128BitRegClass) { } else if (RC == &SystemZ::FP128BitRegClass) {
LoadOpcode = SystemZ::LX; LoadOpcode = SystemZ::LX;
StoreOpcode = SystemZ::STX; StoreOpcode = SystemZ::STX;
} else if (RC == &SystemZ::VR32BitRegClass) {
LoadOpcode = SystemZ::VL32;
StoreOpcode = SystemZ::VST32;
} else if (RC == &SystemZ::VR64BitRegClass) {
LoadOpcode = SystemZ::VL64;
StoreOpcode = SystemZ::VST64;
} else if (RC == &SystemZ::VF128BitRegClass || } else if (RC == &SystemZ::VF128BitRegClass ||
RC == &SystemZ::VR128BitRegClass) { RC == &SystemZ::VR128BitRegClass) {
LoadOpcode = SystemZ::VL; LoadOpcode = SystemZ::VL;

View File

@@ -14,6 +14,8 @@
let Predicates = [FeatureVector] in { let Predicates = [FeatureVector] in {
// Register move. // Register move.
def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
// Load GR from VR element. // Load GR from VR element.
def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
@@ -123,6 +125,13 @@ let Predicates = [FeatureVector] in {
def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
(VLREPG bdxaddr12only:$addr)>; (VLREPG bdxaddr12only:$addr)>;
// Use VLREP to load subvectors. These patterns use "12pair" because
// LEY and LDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
// Load logical element and zero. // Load logical element and zero.
def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
@@ -193,6 +202,13 @@ let Predicates = [FeatureVector] in {
imm32zx1:$index), imm32zx1:$index),
(VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
// Use VSTE to store subvectors. These patterns use "12pair" because
// STEY and STDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
// Scatter element. // Scatter element.
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
@@ -778,7 +794,7 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
let Predicates = [FeatureVector] in { let Predicates = [FeatureVector] in {
// Add. // Add.
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>; def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
// Convert from fixed 64-bit. // Convert from fixed 64-bit.
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
@@ -804,53 +820,55 @@ let Predicates = [FeatureVector] in {
// Divide. // Divide.
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>; def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
// Load FP integer. // Load FP integer.
def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>; def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
defm : VectorRounding<VFIDB, v128db>; defm : VectorRounding<VFIDB, v128db>;
defm : VectorRounding<WFIDB, v64db>;
// Load lengthened. // Load lengthened.
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, null_frag, v64db, v32eb, 2, 8>; def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
// Load rounded, // Load rounded,
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
// Multiply. // Multiply.
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>; def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
// Multiply and add. // Multiply and add.
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>; def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
// Multiply and subtract. // Multiply and subtract.
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>; def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
// Load complement, // Load complement,
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>; def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
// Load negative. // Load negative.
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>; def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
// Load positive. // Load positive.
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>; def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
// Square root. // Square root.
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>; def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
// Subtract. // Subtract.
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>; def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
// Test data class immediate. // Test data class immediate.
let Defs = [CC] in { let Defs = [CC] in {
@@ -866,7 +884,7 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in { let Predicates = [FeatureVector] in {
// Compare scalar. // Compare scalar.
let Defs = [CC] in let Defs = [CC] in
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, null_frag, v64db, 3>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
// Compare and signal scalar. // Compare and signal scalar.
let Defs = [CC] in let Defs = [CC] in

View File

@@ -15,6 +15,7 @@
#include "SystemZTargetMachine.h" #include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm; using namespace llvm;
@@ -36,6 +37,10 @@ public:
private: private:
bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
unsigned LLIxL, unsigned LLIxH); unsigned LLIxL, unsigned LLIxH);
bool shortenOn0(MachineInstr &MI, unsigned Opcode);
bool shortenOn01(MachineInstr &MI, unsigned Opcode);
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII; const SystemZInstrInfo *TII;
@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
return false; return false;
} }
// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
return true;
}
return false;
}
// Change MI's opcode to Opcode if register operands 0 and 1 have a
// 4-bit encoding.
bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
return true;
}
return false;
}
// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
// 4-bit encoding and if operands 0 and 1 are tied.
bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
return true;
}
return false;
}
// MI is a vector-style conversion instruction with the operand order:
// destination, source, exact-suppress, rounding-mode. If both registers
// have a 4-bit encoding then change it to Opcode, which has operand order:
// destination, rouding-mode, source, exact-suppress.
bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
MachineOperand Dest(MI.getOperand(0));
MachineOperand Src(MI.getOperand(1));
MachineOperand Suppress(MI.getOperand(2));
MachineOperand Mode(MI.getOperand(3));
MI.RemoveOperand(3);
MI.RemoveOperand(2);
MI.RemoveOperand(1);
MI.RemoveOperand(0);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
.addOperand(Dest)
.addOperand(Mode)
.addOperand(Src)
.addOperand(Suppress);
return true;
}
return false;
}
// Process all instructions in MBB. Return true if something changed. // Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false; bool Changed = false;
@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
// Iterate backwards through the block looking for instructions to change. // Iterate backwards through the block looking for instructions to change.
for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
MachineInstr &MI = *MBBI; MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode(); switch (MI.getOpcode()) {
if (Opcode == SystemZ::IILF) case SystemZ::IILF:
Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
SystemZ::LLILH); SystemZ::LLILH);
else if (Opcode == SystemZ::IIHF) break;
case SystemZ::IIHF:
Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
SystemZ::LLIHH); SystemZ::LLIHH);
break;
case SystemZ::WFADB:
Changed |= shortenOn001(MI, SystemZ::ADBR);
break;
case SystemZ::WFDDB:
Changed |= shortenOn001(MI, SystemZ::DDBR);
break;
case SystemZ::WFIDB:
Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
break;
case SystemZ::WLDEB:
Changed |= shortenOn01(MI, SystemZ::LDEBR);
break;
case SystemZ::WLEDB:
Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
break;
case SystemZ::WFMDB:
Changed |= shortenOn001(MI, SystemZ::MDBR);
break;
case SystemZ::WFLCDB:
Changed |= shortenOn01(MI, SystemZ::LCDBR);
break;
case SystemZ::WFLNDB:
Changed |= shortenOn01(MI, SystemZ::LNDBR);
break;
case SystemZ::WFLPDB:
Changed |= shortenOn01(MI, SystemZ::LPDBR);
break;
case SystemZ::WFSQDB:
Changed |= shortenOn01(MI, SystemZ::SQDBR);
break;
case SystemZ::WFSDB:
Changed |= shortenOn001(MI, SystemZ::SDBR);
break;
case SystemZ::WFCDB:
Changed |= shortenOn01(MI, SystemZ::CDBR);
break;
case SystemZ::VL32:
// For z13 we prefer LDE over LE to avoid partial register dependencies.
Changed |= shortenOn0(MI, SystemZ::LDE32);
break;
case SystemZ::VST32:
Changed |= shortenOn0(MI, SystemZ::STE);
break;
case SystemZ::VL64:
Changed |= shortenOn0(MI, SystemZ::LD);
break;
case SystemZ::VST64:
Changed |= shortenOn0(MI, SystemZ::STD);
break;
}
unsigned UsedLow = 0; unsigned UsedLow = 0;
unsigned UsedHigh = 0; unsigned UsedHigh = 0;
for (auto MOI = MI.operands_begin(), MOE = MI.operands_end(); for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();

View File

@@ -1,6 +1,7 @@
; Test floating-point absolute. ; Test floating-point absolute.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32. ; Test f32.
declare float @llvm.fabs.f32(float %f) declare float @llvm.fabs.f32(float %f)

View File

@@ -1,6 +1,7 @@
; Test negated floating-point absolute. ; Test negated floating-point absolute.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32. ; Test f32.
declare float @llvm.fabs.f32(float %f) declare float @llvm.fabs.f32(float %f)

View File

@@ -1,7 +1,8 @@
; Test 64-bit floating-point addition. ; Test 64-bit floating-point addition.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo() declare double @foo()
; Check register addition. ; Check register addition.
@@ -76,7 +77,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) { define double @f7(double *%ptr0) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT ; CHECK: brasl %r14, foo@PLT
; CHECK: adb %f0, 160(%r15) ; CHECK-SCALAR: adb %f0, 160(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4 %ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -1,7 +1,10 @@
; Test 64-bit floating-point comparison. The tests assume a z10 implementation ; Test 64-bit floating-point comparison. The tests assume a z10 implementation
; of select, using conditional branches rather than LOCGR. ; of select, using conditional branches rather than LOCGR.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @foo() declare double @foo()
@@ -9,8 +12,9 @@ declare double @foo()
define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: cdbr %f0, %f2 ; CHECK: cdbr %f0, %f2
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%cond = fcmp oeq double %f1, %f2 %cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b %res = select i1 %cond, i64 %a, i64 %b
@@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
; CHECK-LABEL: f2: ; CHECK-LABEL: f2:
; CHECK: cdb %f0, 0(%r4) ; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%f2 = load double , double *%ptr %f2 = load double , double *%ptr
%cond = fcmp oeq double %f1, %f2 %cond = fcmp oeq double %f1, %f2
@@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f3: ; CHECK-LABEL: f3:
; CHECK: cdb %f0, 4088(%r4) ; CHECK: cdb %f0, 4088(%r4)
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 511 %ptr = getelementptr double, double *%base, i64 511
%f2 = load double , double *%ptr %f2 = load double , double *%ptr
@@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f4: ; CHECK-LABEL: f4:
; CHECK: aghi %r4, 4096 ; CHECK: aghi %r4, 4096
; CHECK: cdb %f0, 0(%r4) ; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 512 %ptr = getelementptr double, double *%base, i64 512
%f2 = load double , double *%ptr %f2 = load double , double *%ptr
@@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f5: ; CHECK-LABEL: f5:
; CHECK: aghi %r4, -8 ; CHECK: aghi %r4, -8
; CHECK: cdb %f0, 0(%r4) ; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 -1 %ptr = getelementptr double, double *%base, i64 -1
%f2 = load double , double *%ptr %f2 = load double , double *%ptr
@@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
; CHECK-LABEL: f6: ; CHECK-LABEL: f6:
; CHECK: sllg %r1, %r5, 3 ; CHECK: sllg %r1, %r5, 3
; CHECK: cdb %f0, 800(%r1,%r4) ; CHECK: cdb %f0, 800(%r1,%r4)
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr double, double *%base, i64 %index %ptr1 = getelementptr double, double *%base, i64 %index
%ptr2 = getelementptr double, double *%ptr1, i64 100 %ptr2 = getelementptr double, double *%ptr1, i64 100
@@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) { define double @f7(double *%ptr0) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT ; CHECK: brasl %r14, foo@PLT
; CHECK: cdb {{%f[0-9]+}}, 160(%r15) ; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4 %ptr2 = getelementptr double, double *%ptr0, i64 4
@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) {
; Check comparison with zero. ; Check comparison with zero.
define i64 @f8(i64 %a, i64 %b, double %f) { define i64 @f8(i64 %a, i64 %b, double %f) {
; CHECK-LABEL: f8: ; CHECK-LABEL: f8:
; CHECK: ltdbr %f0, %f0 ; CHECK-SCALAR: ltdbr %f0, %f0
; CHECK-NEXT: je ; CHECK-SCALAR-NEXT: je
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR: lzdr %f1
; CHECK-VECTOR-NEXT: cdbr %f0, %f1
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%cond = fcmp oeq double %f, 0.0 %cond = fcmp oeq double %f, 0.0
%res = select i1 %cond, i64 %a, i64 %b %res = select i1 %cond, i64 %a, i64 %b
@@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) { define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
; CHECK-LABEL: f9: ; CHECK-LABEL: f9:
; CHECK: cdb %f0, 0(%r4) ; CHECK: cdb %f0, 0(%r4)
; CHECK-NEXT: jl {{\.L.*}} ; CHECK-SCALAR-NEXT: jl
; CHECK: lgr %r2, %r3 ; CHECK-SCALAR: lgr %r2, %r3
; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
; CHECK: br %r14 ; CHECK: br %r14
%f1 = load double , double *%ptr %f1 = load double , double *%ptr
%cond = fcmp ogt double %f1, %f2 %cond = fcmp ogt double %f1, %f2

View File

@@ -1,11 +1,15 @@
; Test floating-point truncations. ; Test floating-point truncations.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; Test f64->f32. ; Test f64->f32.
define float @f1(double %d1, double %d2) { define float @f1(double %d1, double %d2) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: ledbr %f0, %f2 ; CHECK-SCALAR: ledbr %f0, %f2
; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
; CHECK: br %r14 ; CHECK: br %r14
%res = fptrunc double %d2 to float %res = fptrunc double %d2 to float
ret float %res ret float %res
@@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) {
define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
; CHECK-LABEL: f5: ; CHECK-LABEL: f5:
; CHECK: ldxbr %f1, %f1 ; CHECK: ldxbr %f1, %f1
; CHECK: adbr %f1, %f2 ; CHECK-SCALAR: adbr %f1, %f2
; CHECK: std %f1, 0(%r2) ; CHECK-SCALAR: std %f1, 0(%r2)
; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
; CHECK-VECTOR: std [[REG]], 0(%r2)
; CHECK: br %r14 ; CHECK: br %r14
%val = load fp128 , fp128 *%ptr %val = load fp128 , fp128 *%ptr
%conv = fptrunc fp128 %val to double %conv = fptrunc fp128 %val to double

View File

@@ -1,6 +1,8 @@
; Test extensions of f32 to f64. ; Test extensions of f32 to f64.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Check register extension. ; Check register extension.
define double @f1(float %val) { define double @f1(float %val) {
@@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %index) {
; to use LDEB if possible. ; to use LDEB if possible.
define void @f7(double *%ptr1, float *%ptr2) { define void @f7(double *%ptr1, float *%ptr2) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%val0 = load volatile float , float *%ptr2 %val0 = load volatile float , float *%ptr2
%val1 = load volatile float , float *%ptr2 %val1 = load volatile float , float *%ptr2

View File

@@ -1,6 +1,8 @@
; Test 64-bit floating-point division. ; Test 64-bit floating-point division.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo() declare double @foo()
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) { define double @f7(double *%ptr0) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT ; CHECK: brasl %r14, foo@PLT
; CHECK: ddb %f0, 160(%r15) ; CHECK-SCALAR: ddb %f0, 160(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4 %ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -1,11 +1,13 @@
; Test moves between FPRs. ; Test moves between FPRs.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32 moves. ; Test f32 moves.
define float @f1(float %a, float %b) { define float @f1(float %a, float %b) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: ler %f0, %f2 ; CHECK: ler %f0, %f2
; CHECK: br %r14
ret float %b ret float %b
} }
@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) {
define double @f2(double %a, double %b) { define double @f2(double %a, double %b) {
; CHECK-LABEL: f2: ; CHECK-LABEL: f2:
; CHECK: ldr %f0, %f2 ; CHECK: ldr %f0, %f2
; CHECK: br %r14
ret double %b ret double %b
} }
@@ -22,6 +25,7 @@ define void @f3(fp128 *%x) {
; CHECK-LABEL: f3: ; CHECK-LABEL: f3:
; CHECK: lxr ; CHECK: lxr
; CHECK: axbr ; CHECK: axbr
; CHECK: br %r14
%val = load volatile fp128 , fp128 *%x %val = load volatile fp128 , fp128 *%x
%sum = fadd fp128 %val, %val %sum = fadd fp128 %val, %val
store volatile fp128 %sum, fp128 *%x store volatile fp128 %sum, fp128 *%x

View File

@@ -1,6 +1,7 @@
; Test 64-bit floating-point loads. ; Test 64-bit floating-point loads.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the low end of the LD range. ; Test the low end of the LD range.
define double @f1(double *%src) { define double @f1(double *%src) {

View File

@@ -1,6 +1,7 @@
; Test 64-bit floating-point stores. ; Test 64-bit floating-point stores.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the low end of the STD range. ; Test the low end of the STD range.
define void @f1(double *%src, double %val) { define void @f1(double *%src, double %val) {

View File

@@ -0,0 +1,110 @@
; Test 32-bit floating-point loads for z13.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test that we use LDE instead of LE - low end of the LE range.
define float @f1(float *%src) {
; CHECK-LABEL: f1:
; CHECK: lde %f0, 0(%r2)
; CHECK: br %r14
%val = load float, float *%src
ret float %val
}
; Test that we use LDE instead of LE - high end of the LE range.
define float @f2(float *%src) {
; CHECK-LABEL: f2:
; CHECK: lde %f0, 4092(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 1023
%val = load float, float *%ptr
ret float %val
}
; Check the next word up, which should use LEY instead of LDE.
define float @f3(float *%src) {
; CHECK-LABEL: f3:
; CHECK: ley %f0, 4096(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 1024
%val = load float, float *%ptr
ret float %val
}
; Check the high end of the aligned LEY range.
define float @f4(float *%src) {
; CHECK-LABEL: f4:
; CHECK: ley %f0, 524284(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 131071
%val = load float, float *%ptr
ret float %val
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define float @f5(float *%src) {
; CHECK-LABEL: f5:
; CHECK: agfi %r2, 524288
; CHECK: lde %f0, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 131072
%val = load float, float *%ptr
ret float %val
}
; Check the high end of the negative aligned LEY range.
define float @f6(float *%src) {
; CHECK-LABEL: f6:
; CHECK: ley %f0, -4(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 -1
%val = load float, float *%ptr
ret float %val
}
; Check the low end of the LEY range.
define float @f7(float *%src) {
; CHECK-LABEL: f7:
; CHECK: ley %f0, -524288(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 -131072
%val = load float, float *%ptr
ret float %val
}
; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define float @f8(float *%src) {
; CHECK-LABEL: f8:
; CHECK: agfi %r2, -524292
; CHECK: lde %f0, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%src, i64 -131073
%val = load float, float *%ptr
ret float %val
}
; Check that LDE allows an index.
define float @f9(i64 %src, i64 %index) {
; CHECK-LABEL: f9:
; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to float *
%val = load float, float *%ptr
ret float %val
}
; Check that LEY allows an index.
define float @f10(i64 %src, i64 %index) {
; CHECK-LABEL: f10:
; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to float *
%val = load float, float *%ptr
ret float %val
}

View File

@@ -1,6 +1,8 @@
; Test multiplication of two f64s, producing an f64 result. ; Test multiplication of two f64s, producing an f64 result.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo() declare double @foo()
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) { define double @f7(double *%ptr0) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT ; CHECK: brasl %r14, foo@PLT
; CHECK: mdb %f0, 160(%r15) ; CHECK-SCALAR: mdb %f0, 160(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4 %ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -1,11 +1,15 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @llvm.fma.f64(double %f1, double %f2, double %f3) declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
define double @f1(double %f1, double %f2, double %acc) { define double @f1(double %f1, double %f2, double %acc) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: madbr %f4, %f0, %f2 ; CHECK-SCALAR: madbr %f4, %f0, %f2
; CHECK: ldr %f0, %f4 ; CHECK-SCALAR: ldr %f0, %f4
; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
; CHECK: br %r14 ; CHECK: br %r14
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res ret double %res

View File

@@ -1,11 +1,15 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @llvm.fma.f64(double %f1, double %f2, double %f3) declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
define double @f1(double %f1, double %f2, double %acc) { define double @f1(double %f1, double %f2, double %acc) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: msdbr %f4, %f0, %f2 ; CHECK-SCALAR: msdbr %f4, %f0, %f2
; CHECK: ldr %f0, %f4 ; CHECK-SCALAR: ldr %f0, %f4
; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
; CHECK: br %r14 ; CHECK: br %r14
%negacc = fsub double -0.0, %acc %negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)

View File

@@ -1,6 +1,7 @@
; Test floating-point negation. ; Test floating-point negation.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32. ; Test f32.
define float @f1(float %f) { define float @f1(float %f) {

View File

@@ -1,6 +1,9 @@
; Test rounding functions for z196 and above. ; Test rounding functions for z196 and above.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; Test rint for f32. ; Test rint for f32.
declare float @llvm.rint.f32(float %f) declare float @llvm.rint.f32(float %f)
@@ -16,7 +19,8 @@ define float @f1(float %f) {
declare double @llvm.rint.f64(double %f) declare double @llvm.rint.f64(double %f)
define double @f2(double %f) { define double @f2(double %f) {
; CHECK-LABEL: f2: ; CHECK-LABEL: f2:
; CHECK: fidbr %f0, 0, %f0 ; CHECK-SCALAR: fidbr %f0, 0, %f0
; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
; CHECK: br %r14 ; CHECK: br %r14
%res = call double @llvm.rint.f64(double %f) %res = call double @llvm.rint.f64(double %f)
ret double %res ret double %res

View File

@@ -1,6 +1,8 @@
; Test 64-bit square root. ; Test 64-bit square root.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.sqrt.f64(double %f) declare double @llvm.sqrt.f64(double %f)
declare double @sqrt(double) declare double @sqrt(double)
@@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %index) {
; to use SQDB if possible. ; to use SQDB if possible.
define void @f7(double *%ptr) { define void @f7(double *%ptr) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: sqdb {{%f[0-9]+}}, 160(%r15) ; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%val0 = load volatile double , double *%ptr %val0 = load volatile double , double *%ptr
%val1 = load volatile double , double *%ptr %val1 = load volatile double , double *%ptr

View File

@@ -1,6 +1,8 @@
; Test 64-bit floating-point subtraction. ; Test 64-bit floating-point subtraction.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo() declare double @foo()
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) { define double @f7(double *%ptr0) {
; CHECK-LABEL: f7: ; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT ; CHECK: brasl %r14, foo@PLT
; CHECK: sdb %f0, 16{{[04]}}(%r15) ; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr1 = getelementptr double, double *%ptr0, i64 2
%ptr2 = getelementptr double, double *%ptr0, i64 4 %ptr2 = getelementptr double, double *%ptr0, i64 4

View File

@@ -2,7 +2,7 @@
; uses a different register class, but the set of saved and restored ; uses a different register class, but the set of saved and restored
; registers should be the same. ; registers should be the same.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; This function should require all FPRs, but no other spill slots. ; This function should require all FPRs, but no other spill slots.
; We need to save and restore 8 of the 16 FPRs, so the frame size ; We need to save and restore 8 of the 16 FPRs, so the frame size

View File

@@ -1,7 +1,7 @@
; Test the saving and restoring of FPRs in large frames. ; Test the saving and restoring of FPRs in large frames.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
; Test a frame size that requires some FPRs to be saved and loaded using ; Test a frame size that requires some FPRs to be saved and loaded using
; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.

View File

@@ -1,6 +1,6 @@
; Test spilling of FPRs. ; Test spilling of FPRs.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; We need to save and restore 8 of the 16 FPRs and allocate an additional ; We need to save and restore 8 of the 16 FPRs and allocate an additional
; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly ; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly

View File

@@ -0,0 +1,445 @@
; Like frame-03.ll, but for z13. In this case we have 16 more registers
; available.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; This function should require all FPRs, but no other spill slots.
; We need to save and restore 8 of the 16 FPRs, so the frame size
; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160
; (the caller-allocated part of the frame) + 224.
define void @f1(double *%ptr) {
; CHECK-LABEL: f1:
; CHECK: aghi %r15, -224
; CHECK: .cfi_def_cfa_offset 384
; CHECK: std %f8, 216(%r15)
; CHECK: std %f9, 208(%r15)
; CHECK: std %f10, 200(%r15)
; CHECK: std %f11, 192(%r15)
; CHECK: std %f12, 184(%r15)
; CHECK: std %f13, 176(%r15)
; CHECK: std %f14, 168(%r15)
; CHECK: std %f15, 160(%r15)
; CHECK: .cfi_offset %f8, -168
; CHECK: .cfi_offset %f9, -176
; CHECK: .cfi_offset %f10, -184
; CHECK: .cfi_offset %f11, -192
; CHECK: .cfi_offset %f12, -200
; CHECK: .cfi_offset %f13, -208
; CHECK: .cfi_offset %f14, -216
; CHECK: .cfi_offset %f15, -224
; CHECK-DAG: ld %f0, 0(%r2)
; CHECK-DAG: ld %f7, 0(%r2)
; CHECK-DAG: ld %f8, 0(%r2)
; CHECK-DAG: ld %f15, 0(%r2)
; CHECK-DAG: vlrepg %v16, 0(%r2)
; CHECK-DAG: vlrepg %v23, 0(%r2)
; CHECK-DAG: vlrepg %v24, 0(%r2)
; CHECK-DAG: vlrepg %v31, 0(%r2)
; CHECK: ld %f8, 216(%r15)
; CHECK: ld %f9, 208(%r15)
; CHECK: ld %f10, 200(%r15)
; CHECK: ld %f11, 192(%r15)
; CHECK: ld %f12, 184(%r15)
; CHECK: ld %f13, 176(%r15)
; CHECK: ld %f14, 168(%r15)
; CHECK: ld %f15, 160(%r15)
; CHECK: aghi %r15, 224
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l8 = load volatile double, double *%ptr
%l9 = load volatile double, double *%ptr
%l10 = load volatile double, double *%ptr
%l11 = load volatile double, double *%ptr
%l12 = load volatile double, double *%ptr
%l13 = load volatile double, double *%ptr
%l14 = load volatile double, double *%ptr
%l15 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc8 = fsub double %l8, %acc7
%acc9 = fsub double %l9, %acc8
%acc10 = fsub double %l10, %acc9
%acc11 = fsub double %l11, %acc10
%acc12 = fsub double %l12, %acc11
%acc13 = fsub double %l13, %acc12
%acc14 = fsub double %l14, %acc13
%acc15 = fsub double %l15, %acc14
%acc16 = fsub double %l16, %acc15
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc8, double *%ptr
store volatile double %acc9, double *%ptr
store volatile double %acc10, double *%ptr
store volatile double %acc11, double *%ptr
store volatile double %acc12, double *%ptr
store volatile double %acc13, double *%ptr
store volatile double %acc14, double *%ptr
store volatile double %acc15, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}
; Like f1, but requires one fewer FPR. We allocate in numerical order,
; so %f15 is the one that gets dropped.
define void @f2(double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: aghi %r15, -216
; CHECK: .cfi_def_cfa_offset 376
; CHECK: std %f8, 208(%r15)
; CHECK: std %f9, 200(%r15)
; CHECK: std %f10, 192(%r15)
; CHECK: std %f11, 184(%r15)
; CHECK: std %f12, 176(%r15)
; CHECK: std %f13, 168(%r15)
; CHECK: std %f14, 160(%r15)
; CHECK: .cfi_offset %f8, -168
; CHECK: .cfi_offset %f9, -176
; CHECK: .cfi_offset %f10, -184
; CHECK: .cfi_offset %f11, -192
; CHECK: .cfi_offset %f12, -200
; CHECK: .cfi_offset %f13, -208
; CHECK: .cfi_offset %f14, -216
; CHECK-NOT: %v15
; CHECK-NOT: %f15
; CHECK: ld %f8, 208(%r15)
; CHECK: ld %f9, 200(%r15)
; CHECK: ld %f10, 192(%r15)
; CHECK: ld %f11, 184(%r15)
; CHECK: ld %f12, 176(%r15)
; CHECK: ld %f13, 168(%r15)
; CHECK: ld %f14, 160(%r15)
; CHECK: aghi %r15, 216
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l8 = load volatile double, double *%ptr
%l9 = load volatile double, double *%ptr
%l10 = load volatile double, double *%ptr
%l11 = load volatile double, double *%ptr
%l12 = load volatile double, double *%ptr
%l13 = load volatile double, double *%ptr
%l14 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc8 = fsub double %l8, %acc7
%acc9 = fsub double %l9, %acc8
%acc10 = fsub double %l10, %acc9
%acc11 = fsub double %l11, %acc10
%acc12 = fsub double %l12, %acc11
%acc13 = fsub double %l13, %acc12
%acc14 = fsub double %l14, %acc13
%acc16 = fsub double %l16, %acc14
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc8, double *%ptr
store volatile double %acc9, double *%ptr
store volatile double %acc10, double *%ptr
store volatile double %acc11, double *%ptr
store volatile double %acc12, double *%ptr
store volatile double %acc13, double *%ptr
store volatile double %acc14, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}
; Like f1, but should require only one call-saved FPR.
define void @f3(double *%ptr) {
; CHECK-LABEL: f3:
; CHECK: aghi %r15, -168
; CHECK: .cfi_def_cfa_offset 328
; CHECK: std %f8, 160(%r15)
; CHECK: .cfi_offset %f8, -168
; CHECK-NOT: {{%[fv]9}}
; CHECK-NOT: {{%[fv]1[0-5]}}
; CHECK: ld %f8, 160(%r15)
; CHECK: aghi %r15, 168
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l8 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc8 = fsub double %l8, %acc7
%acc16 = fsub double %l16, %acc8
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc8, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}
; This function should use all call-clobbered FPRs and vector registers
; but no call-saved ones. It shouldn't need to create a frame.
define void @f4(double *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: %r15
; CHECK-NOT: {{%[fv][89]}}
; CHECK-NOT: {{%[fv]1[0-5]}}
; CHECK: br %r14
%l0 = load volatile double, double *%ptr
%l1 = load volatile double, double *%ptr
%l2 = load volatile double, double *%ptr
%l3 = load volatile double, double *%ptr
%l4 = load volatile double, double *%ptr
%l5 = load volatile double, double *%ptr
%l6 = load volatile double, double *%ptr
%l7 = load volatile double, double *%ptr
%l16 = load volatile double, double *%ptr
%l17 = load volatile double, double *%ptr
%l18 = load volatile double, double *%ptr
%l19 = load volatile double, double *%ptr
%l20 = load volatile double, double *%ptr
%l21 = load volatile double, double *%ptr
%l22 = load volatile double, double *%ptr
%l23 = load volatile double, double *%ptr
%l24 = load volatile double, double *%ptr
%l25 = load volatile double, double *%ptr
%l26 = load volatile double, double *%ptr
%l27 = load volatile double, double *%ptr
%l28 = load volatile double, double *%ptr
%l29 = load volatile double, double *%ptr
%l30 = load volatile double, double *%ptr
%l31 = load volatile double, double *%ptr
%acc0 = fsub double %l0, %l0
%acc1 = fsub double %l1, %acc0
%acc2 = fsub double %l2, %acc1
%acc3 = fsub double %l3, %acc2
%acc4 = fsub double %l4, %acc3
%acc5 = fsub double %l5, %acc4
%acc6 = fsub double %l6, %acc5
%acc7 = fsub double %l7, %acc6
%acc16 = fsub double %l16, %acc7
%acc17 = fsub double %l17, %acc16
%acc18 = fsub double %l18, %acc17
%acc19 = fsub double %l19, %acc18
%acc20 = fsub double %l20, %acc19
%acc21 = fsub double %l21, %acc20
%acc22 = fsub double %l22, %acc21
%acc23 = fsub double %l23, %acc22
%acc24 = fsub double %l24, %acc23
%acc25 = fsub double %l25, %acc24
%acc26 = fsub double %l26, %acc25
%acc27 = fsub double %l27, %acc26
%acc28 = fsub double %l28, %acc27
%acc29 = fsub double %l29, %acc28
%acc30 = fsub double %l30, %acc29
%acc31 = fsub double %l31, %acc30
store volatile double %acc0, double *%ptr
store volatile double %acc1, double *%ptr
store volatile double %acc2, double *%ptr
store volatile double %acc3, double *%ptr
store volatile double %acc4, double *%ptr
store volatile double %acc5, double *%ptr
store volatile double %acc6, double *%ptr
store volatile double %acc7, double *%ptr
store volatile double %acc16, double *%ptr
store volatile double %acc17, double *%ptr
store volatile double %acc18, double *%ptr
store volatile double %acc19, double *%ptr
store volatile double %acc20, double *%ptr
store volatile double %acc21, double *%ptr
store volatile double %acc22, double *%ptr
store volatile double %acc23, double *%ptr
store volatile double %acc24, double *%ptr
store volatile double %acc25, double *%ptr
store volatile double %acc26, double *%ptr
store volatile double %acc27, double *%ptr
store volatile double %acc28, double *%ptr
store volatile double %acc29, double *%ptr
store volatile double %acc30, double *%ptr
store volatile double %acc31, double *%ptr
ret void
}

View File

@@ -1,7 +1,8 @@
; Test v2f64 absolute. ; Test f64 and v2f64 absolute.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.fabs.f64(double)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
; Test a plain absolute. ; Test a plain absolute.
@@ -22,3 +23,24 @@ define <2 x double> @f2(<2 x double> %val) {
%ret = fsub <2 x double> <double -0.0, double -0.0>, %abs %ret = fsub <2 x double> <double -0.0, double -0.0>, %abs
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 absolute that uses vector registers.
define double @f3(<2 x double> %val) {
; CHECK-LABEL: f3:
; CHECK: wflpdb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%ret = call double @llvm.fabs.f64(double %scalar)
ret double %ret
}
; Test an f64 negative absolute that uses vector registers.
define double @f4(<2 x double> %val) {
; CHECK-LABEL: f4:
; CHECK: wflndb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%abs = call double @llvm.fabs.f64(double %scalar)
%ret = fsub double -0.0, %abs
ret double %ret
}

View File

@@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
%ret = fadd <2 x double> %val1, %val2 %ret = fadd <2 x double> %val1, %val2
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 addition that uses vector registers.
define double @f6(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
; CHECK: wfadb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fadd double %scalar1, %scalar2
ret double %ret
}

View File

@@ -1,4 +1,4 @@
; Test v2f64 comparisons. ; Test f64 and v2f64 comparisons.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
@@ -335,3 +335,15 @@ define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 comparison that uses vector registers.
define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) {
; CHECK-LABEL: f29:
; CHECK: wfcdb %f0, %v24
; CHECK-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%f2 = extractelement <2 x double> %vec, i32 0
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}

View File

@@ -11,3 +11,23 @@ define void @f1(<2 x double> %val, <2 x float> *%ptr) {
store <2 x float> %res, <2 x float> *%ptr store <2 x float> %res, <2 x float> *%ptr
ret void ret void
} }
; Test conversion of an f64 in a vector register to an f32.
define float @f2(<2 x double> %vec) {
; CHECK-LABEL: f2:
; CHECK: wledb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %vec, i32 0
%ret = fptrunc double %scalar to float
ret float %ret
}
; Test conversion of an f32 in a vector register to an f64.
define double @f3(<4 x float> %vec) {
; CHECK-LABEL: f3:
; CHECK: wldeb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <4 x float> %vec, i32 0
%ret = fpext float %scalar to double
ret double %ret
}

View File

@@ -70,3 +70,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
%ret = fdiv <2 x double> %val1, %val2 %ret = fdiv <2 x double> %val1, %val2
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 division that uses vector registers.
define double @f6(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
; CHECK: wfddb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fdiv double %scalar1, %scalar2
ret double %ret
}

View File

@@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
%ret = fmul <2 x double> %val1, %val2 %ret = fmul <2 x double> %val1, %val2
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 multiplication that uses vector registers.
define double @f6(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
; CHECK: wfmdb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fmul double %scalar1, %scalar2
ret double %ret
}

View File

@@ -46,3 +46,13 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) {
%ret = fsub <2 x double> <double -0.0, double -0.0>, %val %ret = fsub <2 x double> <double -0.0, double -0.0>, %val
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 negation that uses vector registers.
define double @f6(<2 x double> %val) {
; CHECK-LABEL: f6:
; CHECK: wflcdb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%ret = fsub double -0.0, %scalar
ret double %ret
}

View File

@@ -2,6 +2,12 @@
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.rint.f64(double)
declare double @llvm.nearbyint.f64(double)
declare double @llvm.floor.f64(double)
declare double @llvm.ceil.f64(double)
declare double @llvm.trunc.f64(double)
declare double @llvm.round.f64(double)
declare <2 x double> @llvm.rint.v2f64(<2 x double>) declare <2 x double> @llvm.rint.v2f64(<2 x double>)
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
declare <2 x double> @llvm.floor.v2f64(<2 x double>) declare <2 x double> @llvm.floor.v2f64(<2 x double>)
@@ -56,3 +62,57 @@ define <2 x double> @f6(<2 x double> %val) {
%res = call <2 x double> @llvm.round.v2f64(<2 x double> %val) %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val)
ret <2 x double> %res ret <2 x double> %res
} }
define double @f7(<2 x double> %val) {
; CHECK-LABEL: f7:
; CHECK: wfidb %f0, %v24, 0, 0
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.rint.f64(double %scalar)
ret double %res
}
define double @f8(<2 x double> %val) {
; CHECK-LABEL: f8:
; CHECK: wfidb %f0, %v24, 4, 0
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.nearbyint.f64(double %scalar)
ret double %res
}
define double @f9(<2 x double> %val) {
; CHECK-LABEL: f9:
; CHECK: wfidb %f0, %v24, 4, 7
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.floor.f64(double %scalar)
ret double %res
}
define double @f10(<2 x double> %val) {
; CHECK-LABEL: f10:
; CHECK: wfidb %f0, %v24, 4, 6
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.ceil.f64(double %scalar)
ret double %res
}
define double @f11(<2 x double> %val) {
; CHECK-LABEL: f11:
; CHECK: wfidb %f0, %v24, 4, 5
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.trunc.f64(double %scalar)
ret double %res
}
define double @f12(<2 x double> %val) {
; CHECK-LABEL: f12:
; CHECK: wfidb %f0, %v24, 4, 1
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%res = call double @llvm.round.f64(double %scalar)
ret double %res
}

View File

@@ -1,7 +1,8 @@
; Test v2f64 square root. ; Test f64 and v2f64 square root.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.sqrt.f64(double)
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
define <2 x double> @f1(<2 x double> %val) { define <2 x double> @f1(<2 x double> %val) {
@@ -11,3 +12,12 @@ define <2 x double> @f1(<2 x double> %val) {
%ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
ret <2 x double> %ret ret <2 x double> %ret
} }
define double @f2(<2 x double> %val) {
; CHECK-LABEL: f2:
; CHECK: wfsqdb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <2 x double> %val, i32 0
%ret = call double @llvm.sqrt.f64(double %scalar)
ret double %ret
}

View File

@@ -74,3 +74,14 @@ define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
%ret = fsub <2 x double> %val1, %val2 %ret = fsub <2 x double> %val1, %val2
ret <2 x double> %ret ret <2 x double> %ret
} }
; Test an f64 subtraction that uses vector registers.
define double @f7(<2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f7:
; CHECK: wfsdb %f0, %v24, %v26
; CHECK: br %r14
%scalar1 = extractelement <2 x double> %val1, i32 0
%scalar2 = extractelement <2 x double> %val2, i32 0
%ret = fsub double %scalar1, %scalar2
ret double %ret
}