[SystemZ] Add CodeGen support for integer vector types

This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility.  This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).

When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
  (except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.

The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.

However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.

These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level.  This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.

Based on a patch by Richard Sandiford.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236521 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ulrich Weigand 2015-05-05 19:25:42 +00:00
parent 1a21909e98
commit aa5c996eda
95 changed files with 10849 additions and 146 deletions

View File

@ -87,6 +87,13 @@ const unsigned IPM_CC = 28;
const unsigned PFD_READ = 1;
const unsigned PFD_WRITE = 2;
// Number of bits in a vector register.
const unsigned VectorBits = 128;
// Number of bytes in a vector register (and consequently the number of
// bytes in a general permute vector).
const unsigned VectorBytes = VectorBits / 8;
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;

View File

@ -151,6 +151,13 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
break;
case SystemZ::VLVGP32:
LoweredMI = MCInstBuilder(SystemZ::VLVGP)
.addReg(MI->getOperand(0).getReg())
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg()))
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
break;
#define LOWER_LOW(NAME) \
case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break

View File

@ -10,6 +10,9 @@
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
namespace llvm {
namespace SystemZ {
const unsigned NumArgGPRs = 5;
@ -18,6 +21,47 @@ namespace SystemZ {
const unsigned NumArgFPRs = 4;
extern const unsigned ArgFPRs[NumArgFPRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
private:
/// Records whether the value was a fixed argument.
/// See ISD::OutputArg::IsFixed.
SmallVector<bool, 4> ArgIsFixed;
public:
SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
: CCState(CC, isVarArg, MF, locs, C) {}
void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
CCAssignFn Fn) {
// Formal arguments are always fixed.
ArgIsFixed.clear();
for (unsigned i = 0; i < Ins.size(); ++i)
ArgIsFixed.push_back(true);
CCState::AnalyzeFormalArguments(Ins, Fn);
}
void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Record whether the call operand was a fixed argument.
ArgIsFixed.clear();
for (unsigned i = 0; i < Outs.size(); ++i)
ArgIsFixed.push_back(Outs[i].IsFixed);
CCState::AnalyzeCallOperands(Outs, Fn);
}
// This version of AnalyzeCallOperands in the base class is not usable
// since we must provide a means of accessing ISD::OutputArg::IsFixed.
void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
CCAssignFn Fn) = delete;
bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; }
};
} // end namespace llvm
#endif

View File

@ -12,6 +12,15 @@
class CCIfExtend<CCAction A>
: CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
class CCIfSubtarget<string F, CCAction A>
: CCIf<!strconcat("static_cast<const SystemZSubtarget&>"
"(State.getMachineFunction().getSubtarget()).", F),
A>;
// Match if this specific argument is a fixed (i.e. named) argument.
class CCIfFixed<CCAction A>
: CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>;
//===----------------------------------------------------------------------===//
// z/Linux return value calling convention
//===----------------------------------------------------------------------===//
@ -31,7 +40,12 @@ def RetCC_SystemZ : CallingConv<[
// doesn't care about the ABI. All floating-point argument registers
// are call-clobbered, so we can use all of them here.
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
// Similarly for vectors, with V24 being the ABI-compliant choice.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64],
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
// ABI-compliant code returns long double by reference, but that conversion
// is left to higher-level code. Perhaps we could add an f128 definition
@ -60,6 +74,17 @@ def CC_SystemZ : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
// The first 8 named vector arguments are passed in V24-V31.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64],
CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
V25, V27, V29, V31]>>>>,
// Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64],
CCAssignToStack<16, 8>>>,
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;

View File

@ -255,6 +255,13 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
Addr, Base, Disp, Index);
}
// Try to match Addr as an address with a base, 12-bit displacement
// and index, where the index is element Elem of a vector.
// Return true on success, storing the base, displacement and vector
// in Base, Disp and Index respectively.
bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base,
SDValue &Disp, SDValue &Index) const;
// Check whether (or Op (and X InsertMask)) is effectively an insertion
// of X into bits InsertMask of some Y != Op. Return true if so and
// set Op to that Y.
@ -292,6 +299,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
// Try to use gather instruction Opcode to implement vector insertion N.
SDNode *tryGather(SDNode *N, unsigned Opcode);
// Try to use scatter instruction Opcode to implement store Store.
SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode);
// Return true if Load and Store are loads and stores of the same size
// and are guaranteed not to overlap. Such operations can be implemented
// using block (SS-format) instructions.
@ -645,6 +658,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
return true;
}
bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem,
SDValue &Base,
SDValue &Disp,
SDValue &Index) const {
SDValue Regs[2];
if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) &&
Regs[0].getNode() && Regs[1].getNode()) {
for (unsigned int I = 0; I < 2; ++I) {
Base = Regs[I];
Index = Regs[1 - I];
// We can't tell here whether the index vector has the right type
// for the access; the caller needs to do that instead.
if (Index.getOpcode() == ISD::ZERO_EXTEND)
Index = Index.getOperand(0);
if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Index.getOperand(1) == Elem) {
Index = Index.getOperand(0);
return true;
}
}
}
return false;
}
bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
uint64_t InsertMask) const {
// We're only interested in cases where the insertion is into some operand
@ -984,6 +1021,71 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
return Or.getNode();
}
SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
SDValue ElemV = N->getOperand(2);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
if (!ElemN)
return 0;
unsigned Elem = ElemN->getZExtValue();
EVT VT = N->getValueType(0);
if (Elem >= VT.getVectorNumElements())
return 0;
auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
if (!Load || !Load->hasOneUse())
return 0;
if (Load->getMemoryVT().getSizeInBits() !=
Load->getValueType(0).getSizeInBits())
return 0;
SDValue Base, Disp, Index;
if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
Index.getValueType() != VT.changeVectorElementTypeToInteger())
return 0;
SDLoc DL(Load);
SDValue Ops[] = {
N->getOperand(0), Base, Disp, Index,
CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain()
};
SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
return Res;
}
SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
SDValue Value = Store->getValue();
if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return 0;
if (Store->getMemoryVT().getSizeInBits() !=
Value.getValueType().getSizeInBits())
return 0;
SDValue ElemV = Value.getOperand(1);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
if (!ElemN)
return 0;
SDValue Vec = Value.getOperand(0);
EVT VT = Vec.getValueType();
unsigned Elem = ElemN->getZExtValue();
if (Elem >= VT.getVectorNumElements())
return 0;
SDValue Base, Disp, Index;
if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
Index.getValueType() != VT.changeVectorElementTypeToInteger())
return 0;
SDLoc DL(Store);
SDValue Ops[] = {
Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),
Store->getChain()
};
return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
}
bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
LoadSDNode *Load) const {
// Check that the two memory operands have the same size.
@ -1120,6 +1222,26 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
EVT VT = Node->getValueType(0);
unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
if (ElemBitSize == 32)
ResNode = tryGather(Node, SystemZ::VGEF);
else if (ElemBitSize == 64)
ResNode = tryGather(Node, SystemZ::VGEG);
break;
}
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(Node);
unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
if (ElemBitSize == 32)
ResNode = tryScatter(Store, SystemZ::VSCEF);
else if (ElemBitSize == 64)
ResNode = tryScatter(Store, SystemZ::VSCEG);
break;
}
}
// Select the default instruction

File diff suppressed because it is too large Load Diff

View File

@ -155,6 +155,70 @@ enum {
// Transaction end. Just the chain operand. Returns chain and glue.
TEND,
// Create a vector constant by filling byte N of the result with bit
// 15-N of the single operand.
BYTE_MASK,
// Create a vector constant by replicating an element-sized RISBG-style mask.
// The first operand specifies the starting set bit and the second operand
// specifies the ending set bit. Both operands count from the MSB of the
// element.
ROTATE_MASK,
// Replicate a GPR scalar value into all elements of a vector.
REPLICATE,
// Create a vector from two i64 GPRs.
JOIN_DWORDS,
// Replicate one element of a vector into all elements. The first operand
// is the vector and the second is the index of the element to replicate.
SPLAT,
// Interleave elements from the high half of operand 0 and the high half
// of operand 1.
MERGE_HIGH,
// Likewise for the low halves.
MERGE_LOW,
// Concatenate the vectors in the first two operands, shift them left
// by the third operand, and take the first half of the result.
SHL_DOUBLE,
// Take one element of the first v2i64 operand and the one element of
// the second v2i64 operand and concatenate them to form a v2i64 result.
// The third operand is a 4-bit value of the form 0A0B, where A and B
// are the element selectors for the first operand and second operands
// respectively.
PERMUTE_DWORDS,
// Perform a general vector permute on vector operands 0 and 1.
// Each byte of operand 2 controls the corresponding byte of the result,
// in the same way as a byte-level VECTOR_SHUFFLE mask.
PERMUTE,
// Pack vector operands 0 and 1 into a single vector with half-sized elements.
PACK,
// Shift each element of vector operand 0 by the number of bits specified
// by scalar operand 1.
VSHL_BY_SCALAR,
VSRL_BY_SCALAR,
VSRA_BY_SCALAR,
// For each element of the output type, sum across all sub-elements of
// operand 0 belonging to the corresponding element, and add in the
// rightmost sub-element of the corresponding element of operand 1.
VSUM,
// Compare integer vector operands 0 and 1 to produce the usual 0/-1
// vector result. VICMPE is for equality, VICMPH for "signed greater than"
// and VICMPHL for "unsigned greater than".
VICMPE,
VICMPH,
VICMPHL,
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@ -222,6 +286,11 @@ public:
MVT getScalarShiftAmountTy(EVT LHSTy) const override {
return MVT::i32;
}
MVT getVectorIdxTy() const override {
// Only the lower 12 bits of an element index are used, so we don't
// want to clobber the upper 32 bits of a GPR unnecessarily.
return MVT::i32;
}
EVT getSetCCResultType(LLVMContext &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
@ -328,6 +397,16 @@ private:
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
unsigned Index, DAGCombinerInfo &DCI,
bool Force) const;
SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.

View File

@ -2414,6 +2414,10 @@ class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
let Constraints = "$R1 = $R1src";
}
// An alias of a BinaryVRRf, but with different register sizes.
class BinaryAliasVRRf<RegisterOperand cls>
: Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
// An alias of a CompareRI, but with different register sizes.
class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>

View File

@ -578,6 +578,8 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::LDR;
else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LXR;
else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::VLR;
else
llvm_unreachable("Impossible reg-to-reg copy");
@ -1116,6 +1118,10 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
} else if (RC == &SystemZ::FP128BitRegClass) {
LoadOpcode = SystemZ::LX;
StoreOpcode = SystemZ::STX;
} else if (RC == &SystemZ::VF128BitRegClass ||
RC == &SystemZ::VR128BitRegClass) {
LoadOpcode = SystemZ::VL;
StoreOpcode = SystemZ::VST;
} else
llvm_unreachable("Unsupported regclass to load or store");
}
@ -1185,6 +1191,7 @@ static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
unsigned &Start, unsigned &End) const {
// Reject trivial all-zero masks.
Mask &= allOnes(BitSize);
if (Mask == 0)
return false;

View File

@ -19,18 +19,34 @@ let Predicates = [FeatureVector] in {
def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>;
def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>;
def VLGVG : BinaryVRSc<"vlgvg", 0xE721, null_frag, v128g, 3>;
def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>;
// Load VR element from GR.
def VLVGB : TernaryVRSb<"vlvgb", 0xE722, null_frag, v128b, v128b, GR32, 0>;
def VLVGH : TernaryVRSb<"vlvgh", 0xE722, null_frag, v128h, v128h, GR32, 1>;
def VLVGF : TernaryVRSb<"vlvgf", 0xE722, null_frag, v128f, v128f, GR32, 2>;
def VLVGG : TernaryVRSb<"vlvgg", 0xE722, null_frag, v128g, v128g, GR64, 3>;
def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert,
v128b, v128b, GR32, 0>;
def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert,
v128h, v128h, GR32, 1>;
def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert,
v128f, v128f, GR32, 2>;
def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert,
v128g, v128g, GR64, 3>;
// Load VR from GRs disjoint.
def VLVGP : BinaryVRRf<"vlvgp", 0xE762, null_frag, v128g>;
def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>;
def VLVGP32 : BinaryAliasVRRf<GR32>;
}
// Extractions always assign to the full GR64, even if the element would
// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a
// subreg of the result.
class VectorExtractSubreg<ValueType type, Instruction insn>
: Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)),
(EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>;
def : VectorExtractSubreg<v16i8, VLGVB>;
def : VectorExtractSubreg<v8i16, VLGVH>;
def : VectorExtractSubreg<v4i32, VLGVF>;
//===----------------------------------------------------------------------===//
// Immediate instructions
//===----------------------------------------------------------------------===//
@ -39,29 +55,38 @@ let Predicates = [FeatureVector] in {
// Generate byte mask.
def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
def VGBM : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>;
def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
// Generate mask.
def VGMB : BinaryVRIb<"vgmb", 0xE746, null_frag, v128b, 0>;
def VGMH : BinaryVRIb<"vgmh", 0xE746, null_frag, v128h, 1>;
def VGMF : BinaryVRIb<"vgmf", 0xE746, null_frag, v128f, 2>;
def VGMG : BinaryVRIb<"vgmg", 0xE746, null_frag, v128g, 3>;
def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
// Load element immediate.
def VLEIB : TernaryVRIa<"vleib", 0xE740, null_frag,
v128b, v128b, imm32sx16trunc, imm32zx4>;
def VLEIH : TernaryVRIa<"vleih", 0xE741, null_frag,
v128h, v128h, imm32sx16trunc, imm32zx3>;
def VLEIF : TernaryVRIa<"vleif", 0xE743, null_frag,
v128f, v128f, imm32sx16, imm32zx2>;
def VLEIG : TernaryVRIa<"vleig", 0xE742, null_frag,
v128g, v128g, imm64sx16, imm32zx1>;
//
// We want these instructions to be used ahead of VLVG* where possible.
// However, VLVG* takes a variable BD-format index whereas VLEI takes
// a plain immediate index. This means that VLVG* has an extra "base"
// register operand and is 3 units more complex. Bumping the complexity
// of the VLEI* instructions by 4 means that they are strictly better
// than VLVG* in cases where both forms match.
let AddedComplexity = 4 in {
def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert,
v128b, v128b, imm32sx16trunc, imm32zx4>;
def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert,
v128h, v128h, imm32sx16trunc, imm32zx3>;
def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert,
v128f, v128f, imm32sx16, imm32zx2>;
def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
v128g, v128g, imm64sx16, imm32zx1>;
}
// Replicate immediate.
def VREPIB : UnaryVRIa<"vrepib", 0xE745, null_frag, v128b, imm32sx16, 0>;
def VREPIH : UnaryVRIa<"vrepih", 0xE745, null_frag, v128h, imm32sx16, 1>;
def VREPIF : UnaryVRIa<"vrepif", 0xE745, null_frag, v128f, imm32sx16, 2>;
def VREPIG : UnaryVRIa<"vrepig", 0xE745, null_frag, v128g, imm32sx16, 3>;
def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
}
//===----------------------------------------------------------------------===//
@ -89,28 +114,45 @@ let Predicates = [FeatureVector] in {
def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
// Load and replicate
def VLREPB : UnaryVRX<"vlrepb", 0xE705, null_frag, v128b, 1, 0>;
def VLREPH : UnaryVRX<"vlreph", 0xE705, null_frag, v128h, 2, 1>;
def VLREPF : UnaryVRX<"vlrepf", 0xE705, null_frag, v128f, 4, 2>;
def VLREPG : UnaryVRX<"vlrepg", 0xE705, null_frag, v128g, 8, 3>;
def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>;
def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
// Load logical element and zero.
def VLLEZB : UnaryVRX<"vllezb", 0xE704, null_frag, v128b, 1, 0>;
def VLLEZH : UnaryVRX<"vllezh", 0xE704, null_frag, v128h, 2, 1>;
def VLLEZF : UnaryVRX<"vllezf", 0xE704, null_frag, v128f, 4, 2>;
def VLLEZG : UnaryVRX<"vllezg", 0xE704, null_frag, v128g, 8, 3>;
def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
// Load element.
def VLEB : TernaryVRX<"vleb", 0xE700, null_frag, v128b, v128b, 1, imm32zx4>;
def VLEH : TernaryVRX<"vleh", 0xE701, null_frag, v128h, v128h, 2, imm32zx3>;
def VLEF : TernaryVRX<"vlef", 0xE703, null_frag, v128f, v128f, 4, imm32zx2>;
def VLEG : TernaryVRX<"vleg", 0xE702, null_frag, v128g, v128g, 8, imm32zx1>;
def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>;
def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
// Gather element.
def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>;
}
// Use replicating loads if we're inserting a single element into an
// undefined vector. This avoids a false dependency on the previous
// register contents.
multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype,
SDPatternOperator load, ValueType scalartype> {
def : Pat<(vectype (z_vector_insert
(undef), (scalartype (load bdxaddr12only:$addr)), 0)),
(vlrep bdxaddr12only:$addr)>;
def : Pat<(vectype (scalar_to_vector
(scalartype (load bdxaddr12only:$addr)))),
(vlrep bdxaddr12only:$addr)>;
}
defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
//===----------------------------------------------------------------------===//
// Stores
//===----------------------------------------------------------------------===//
@ -126,10 +168,10 @@ let Predicates = [FeatureVector] in {
def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
// Store element.
def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, null_frag, v128b, 1, imm32zx4>;
def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, null_frag, v128h, 2, imm32zx3>;
def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, null_frag, v128f, 4, imm32zx2>;
def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, null_frag, v128g, 8, imm32zx1>;
def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>;
def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
// Scatter element.
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
@ -142,28 +184,28 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in {
// Merge high.
def VMRHB : BinaryVRRc<"vmrhb", 0xE761, null_frag, v128b, v128b, 0>;
def VMRHH : BinaryVRRc<"vmrhh", 0xE761, null_frag, v128h, v128h, 1>;
def VMRHF : BinaryVRRc<"vmrhf", 0xE761, null_frag, v128f, v128f, 2>;
def VMRHG : BinaryVRRc<"vmrhg", 0xE761, null_frag, v128g, v128g, 3>;
def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>;
def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
// Merge low.
def VMRLB : BinaryVRRc<"vmrlb", 0xE760, null_frag, v128b, v128b, 0>;
def VMRLH : BinaryVRRc<"vmrlh", 0xE760, null_frag, v128h, v128h, 1>;
def VMRLF : BinaryVRRc<"vmrlf", 0xE760, null_frag, v128f, v128f, 2>;
def VMRLG : BinaryVRRc<"vmrlg", 0xE760, null_frag, v128g, v128g, 3>;
def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
// Permute.
def VPERM : TernaryVRRe<"vperm", 0xE78C, null_frag, v128b, v128b>;
def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
// Permute doubleword immediate.
def VPDI : TernaryVRRc<"vpdi", 0xE784, null_frag, v128b, v128b>;
def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
// Replicate.
def VREPB : BinaryVRIc<"vrepb", 0xE74D, null_frag, v128b, v128b, 0>;
def VREPH : BinaryVRIc<"vreph", 0xE74D, null_frag, v128h, v128h, 1>;
def VREPF : BinaryVRIc<"vrepf", 0xE74D, null_frag, v128f, v128f, 2>;
def VREPG : BinaryVRIc<"vrepg", 0xE74D, null_frag, v128g, v128g, 3>;
def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
// Select.
def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
@ -175,9 +217,9 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in {
// Pack
def VPKH : BinaryVRRc<"vpkh", 0xE794, null_frag, v128b, v128h, 1>;
def VPKF : BinaryVRRc<"vpkf", 0xE794, null_frag, v128h, v128f, 2>;
def VPKG : BinaryVRRc<"vpkg", 0xE794, null_frag, v128f, v128g, 3>;
def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>;
def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>;
def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
// Pack saturate.
defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, null_frag, null_frag,
@ -196,9 +238,12 @@ let Predicates = [FeatureVector] in {
v128f, v128g, 3>;
// Sign-extend to doubleword.
def VSEGB : UnaryVRRa<"vsegb", 0xE75F, null_frag, v128g, v128b, 0>;
def VSEGH : UnaryVRRa<"vsegh", 0xE75F, null_frag, v128g, v128h, 1>;
def VSEGF : UnaryVRRa<"vsegf", 0xE75F, null_frag, v128g, v128f, 2>;
def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>;
def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>;
def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>;
def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>;
def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>;
def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
// Unpack high.
def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, null_frag, v128h, v128b, 0>;
@ -221,16 +266,38 @@ let Predicates = [FeatureVector] in {
def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, null_frag, v128g, v128f, 2>;
}
//===----------------------------------------------------------------------===//
// Instantiating generic operations for specific types.
//===----------------------------------------------------------------------===//
multiclass GenericVectorOps<ValueType type, ValueType inttype> {
let Predicates = [FeatureVector] in {
def : Pat<(type (load bdxaddr12only:$addr)),
(VL bdxaddr12only:$addr)>;
def : Pat<(store (type VR128:$src), bdxaddr12only:$addr),
(VST VR128:$src, bdxaddr12only:$addr)>;
def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)),
(VSEL VR128:$y, VR128:$z, VR128:$x)>;
def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)),
(VSEL VR128:$z, VR128:$y, VR128:$x)>;
}
}
defm : GenericVectorOps<v16i8, v16i8>;
defm : GenericVectorOps<v8i16, v8i16>;
defm : GenericVectorOps<v4i32, v4i32>;
defm : GenericVectorOps<v2i64, v2i64>;
//===----------------------------------------------------------------------===//
// Integer arithmetic
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
// Add.
def VAB : BinaryVRRc<"vab", 0xE7F3, null_frag, v128b, v128b, 0>;
def VAH : BinaryVRRc<"vah", 0xE7F3, null_frag, v128h, v128h, 1>;
def VAF : BinaryVRRc<"vaf", 0xE7F3, null_frag, v128f, v128f, 2>;
def VAG : BinaryVRRc<"vag", 0xE7F3, null_frag, v128g, v128g, 3>;
def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
def VAQ : BinaryVRRc<"vaq", 0xE7F3, null_frag, v128q, v128q, 4>;
// Add compute carry.
@ -268,16 +335,16 @@ let Predicates = [FeatureVector] in {
def VCKSM : BinaryVRRc<"vcksm", 0xE766, null_frag, v128any, v128any>;
// Count leading zeros.
def VCLZB : UnaryVRRa<"vclzb", 0xE753, null_frag, v128b, v128b, 0>;
def VCLZH : UnaryVRRa<"vclzh", 0xE753, null_frag, v128h, v128h, 1>;
def VCLZF : UnaryVRRa<"vclzf", 0xE753, null_frag, v128f, v128f, 2>;
def VCLZG : UnaryVRRa<"vclzg", 0xE753, null_frag, v128g, v128g, 3>;
def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
// Count trailing zeros.
def VCTZB : UnaryVRRa<"vctzb", 0xE752, null_frag, v128b, v128b, 0>;
def VCTZH : UnaryVRRa<"vctzh", 0xE752, null_frag, v128h, v128h, 1>;
def VCTZF : UnaryVRRa<"vctzf", 0xE752, null_frag, v128f, v128f, 2>;
def VCTZG : UnaryVRRa<"vctzg", 0xE752, null_frag, v128g, v128g, 3>;
def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>;
def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
// Exclusive or.
def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
@ -295,16 +362,16 @@ let Predicates = [FeatureVector] in {
def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, null_frag, v128g, v128g, 3>;
// Load complement.
def VLCB : UnaryVRRa<"vlcb", 0xE7DE, null_frag, v128b, v128b, 0>;
def VLCH : UnaryVRRa<"vlch", 0xE7DE, null_frag, v128h, v128h, 1>;
def VLCF : UnaryVRRa<"vlcf", 0xE7DE, null_frag, v128f, v128f, 2>;
def VLCG : UnaryVRRa<"vlcg", 0xE7DE, null_frag, v128g, v128g, 3>;
def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
// Load positive.
def VLPB : UnaryVRRa<"vlpb", 0xE7DF, null_frag, v128b, v128b, 0>;
def VLPH : UnaryVRRa<"vlph", 0xE7DF, null_frag, v128h, v128h, 1>;
def VLPF : UnaryVRRa<"vlpf", 0xE7DF, null_frag, v128f, v128f, 2>;
def VLPG : UnaryVRRa<"vlpg", 0xE7DF, null_frag, v128g, v128g, 3>;
def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>;
def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
// Maximum.
def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
@ -331,9 +398,9 @@ let Predicates = [FeatureVector] in {
def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
// Multiply and add low.
def VMALB : TernaryVRRd<"vmalb", 0xE7AA, null_frag, v128b, v128b, 0>;
def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, null_frag, v128h, v128h, 1>;
def VMALF : TernaryVRRd<"vmalf", 0xE7AA, null_frag, v128f, v128f, 2>;
def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
// Multiply and add high.
def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, null_frag, v128b, v128b, 0>;
@ -376,9 +443,9 @@ let Predicates = [FeatureVector] in {
def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, null_frag, v128f, v128f, 2>;
// Multiply low.
def VMLB : BinaryVRRc<"vmlb", 0xE7A2, null_frag, v128b, v128b, 0>;
def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, null_frag, v128h, v128h, 1>;
def VMLF : BinaryVRRc<"vmlf", 0xE7A2, null_frag, v128f, v128f, 2>;
def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
// Multiply even.
def VMEB : BinaryVRRc<"vmeb", 0xE7A6, null_frag, v128h, v128b, 0>;
@ -408,6 +475,7 @@ let Predicates = [FeatureVector] in {
// Population count.
def VPOPCT : BinaryVRRa<"vpopct", 0xE750>;
def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
// Element rotate left logical (with vector shift amount).
def VERLLVB : BinaryVRRc<"verllvb", 0xE773, null_frag, v128b, v128b, 0>;
@ -428,40 +496,40 @@ let Predicates = [FeatureVector] in {
def VERIMG : QuaternaryVRId<"verimg", 0xE772, null_frag, v128g, v128g, 3>;
// Element shift left (with vector shift amount).
def VESLVB : BinaryVRRc<"veslvb", 0xE770, null_frag, v128b, v128b, 0>;
def VESLVH : BinaryVRRc<"veslvh", 0xE770, null_frag, v128h, v128h, 1>;
def VESLVF : BinaryVRRc<"veslvf", 0xE770, null_frag, v128f, v128f, 2>;
def VESLVG : BinaryVRRc<"veslvg", 0xE770, null_frag, v128g, v128g, 3>;
def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>;
def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>;
def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>;
// Element shift left (with scalar shift amount).
def VESLB : BinaryVRSa<"veslb", 0xE730, null_frag, v128b, v128b, 0>;
def VESLH : BinaryVRSa<"veslh", 0xE730, null_frag, v128h, v128h, 1>;
def VESLF : BinaryVRSa<"veslf", 0xE730, null_frag, v128f, v128f, 2>;
def VESLG : BinaryVRSa<"veslg", 0xE730, null_frag, v128g, v128g, 3>;
def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>;
def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>;
def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>;
def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>;
// Element shift right arithmetic (with vector shift amount).
def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, null_frag, v128b, v128b, 0>;
def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, null_frag, v128h, v128h, 1>;
def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, null_frag, v128f, v128f, 2>;
def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, null_frag, v128g, v128g, 3>;
def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>;
def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>;
def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>;
def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>;
// Element shift right arithmetic (with scalar shift amount).
def VESRAB : BinaryVRSa<"vesrab", 0xE73A, null_frag, v128b, v128b, 0>;
def VESRAH : BinaryVRSa<"vesrah", 0xE73A, null_frag, v128h, v128h, 1>;
def VESRAF : BinaryVRSa<"vesraf", 0xE73A, null_frag, v128f, v128f, 2>;
def VESRAG : BinaryVRSa<"vesrag", 0xE73A, null_frag, v128g, v128g, 3>;
def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>;
def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>;
def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>;
def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>;
// Element shift right logical (with vector shift amount).
def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, null_frag, v128b, v128b, 0>;
def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, null_frag, v128h, v128h, 1>;
def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, null_frag, v128f, v128f, 2>;
def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, null_frag, v128g, v128g, 3>;
def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>;
def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>;
def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>;
def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>;
// Element shift right logical (with scalar shift amount).
def VESRLB : BinaryVRSa<"vesrlb", 0xE738, null_frag, v128b, v128b, 0>;
def VESRLH : BinaryVRSa<"vesrlh", 0xE738, null_frag, v128h, v128h, 1>;
def VESRLF : BinaryVRSa<"vesrlf", 0xE738, null_frag, v128f, v128f, 2>;
def VESRLG : BinaryVRSa<"vesrlg", 0xE738, null_frag, v128g, v128g, 3>;
def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>;
def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>;
def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>;
def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>;
// Shift left.
def VSL : BinaryVRRc<"vsl", 0xE774, null_frag, v128b, v128b>;
@ -470,7 +538,7 @@ let Predicates = [FeatureVector] in {
def VSLB : BinaryVRRc<"vslb", 0xE775, null_frag, v128b, v128b>;
// Shift left double by byte.
def VSLDB : TernaryVRId<"vsldb", 0xE777, null_frag, v128b, v128b, 0>;
def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
// Shift right arithmetic.
def VSRA : BinaryVRRc<"vsra", 0xE77E, null_frag, v128b, v128b>;
@ -485,10 +553,10 @@ let Predicates = [FeatureVector] in {
def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, null_frag, v128b, v128b>;
// Subtract.
def VSB : BinaryVRRc<"vsb", 0xE7F7, null_frag, v128b, v128b, 0>;
def VSH : BinaryVRRc<"vsh", 0xE7F7, null_frag, v128h, v128h, 1>;
def VSF : BinaryVRRc<"vsf", 0xE7F7, null_frag, v128f, v128f, 2>;
def VSG : BinaryVRRc<"vsg", 0xE7F7, null_frag, v128g, v128g, 3>;
def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
def VSQ : BinaryVRRc<"vsq", 0xE7F7, null_frag, v128q, v128q, 4>;
// Subtract compute borrow indication.
@ -505,18 +573,107 @@ let Predicates = [FeatureVector] in {
def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, null_frag, v128q, v128q, 4>;
// Sum across doubleword.
def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, null_frag, v128g, v128h, 1>;
def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, null_frag, v128g, v128f, 2>;
def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>;
// Sum across quadword.
def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, null_frag, v128q, v128f, 2>;
def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, null_frag, v128q, v128g, 3>;
def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>;
def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>;
// Sum across word.
def VSUMB : BinaryVRRc<"vsumb", 0xE764, null_frag, v128f, v128b, 0>;
def VSUMH : BinaryVRRc<"vsumh", 0xE764, null_frag, v128f, v128h, 1>;
def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>;
def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>;
}
// Instantiate the bitwise ops for type TYPE.
multiclass BitwiseVectorOps<ValueType type> {
let Predicates = [FeatureVector] in {
def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>;
def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))),
(VNC VR128:$x, VR128:$y)>;
def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>;
def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>;
def : Pat<(type (or (and VR128:$x, VR128:$z),
(and VR128:$y, (z_vnot VR128:$z)))),
(VSEL VR128:$x, VR128:$y, VR128:$z)>;
def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))),
(VNO VR128:$x, VR128:$y)>;
def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
}
}
defm : BitwiseVectorOps<v16i8>;
defm : BitwiseVectorOps<v8i16>;
defm : BitwiseVectorOps<v4i32>;
defm : BitwiseVectorOps<v2i64>;
// Instantiate additional patterns for absolute-related expressions on
// type TYPE. LC is the negate instruction for TYPE and LP is the absolute
// instruction.
multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc,
Instruction lp, int shift> {
let Predicates = [FeatureVector] in {
def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)),
(z_vneg VR128:$x), VR128:$x)),
(lc (lp VR128:$x))>;
def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))),
VR128:$x, (z_vneg VR128:$x))),
(lc (lp VR128:$x))>;
def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)),
VR128:$x, (z_vneg VR128:$x))),
(lc (lp VR128:$x))>;
def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))),
(z_vneg VR128:$x), VR128:$x)),
(lc (lp VR128:$x))>;
def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
(z_vneg VR128:$x)),
(and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
VR128:$x))),
(lp VR128:$x)>;
def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
VR128:$x),
(and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
(z_vneg VR128:$x)))),
(lc (lp VR128:$x))>;
}
}
defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>;
defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the
// signed or unsigned "set if greater than" comparison instruction and
// MIN and MAX are the associated minimum and maximum instructions.
multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
Instruction min, Instruction max> {
let Predicates = [FeatureVector] in {
def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
(max VR128:$x, VR128:$y)>;
def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
(min VR128:$x, VR128:$y)>;
def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
VR128:$x, VR128:$y)),
(min VR128:$x, VR128:$y)>;
def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
VR128:$y, VR128:$x)),
(max VR128:$x, VR128:$y)>;
}
}
// Signed min/max.
defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
// Unsigned min/max.
defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
//===----------------------------------------------------------------------===//
// Integer comparison
//===----------------------------------------------------------------------===//
@ -539,33 +696,33 @@ let Predicates = [FeatureVector] in {
}
// Compare equal.
defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, null_frag, null_frag,
defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, null_frag,
v128b, v128b, 0>;
defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, null_frag, null_frag,
defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, null_frag,
v128h, v128h, 1>;
defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, null_frag, null_frag,
defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, null_frag,
v128f, v128f, 2>;
defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, null_frag, null_frag,
defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, null_frag,
v128g, v128g, 3>;
// Compare high.
defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, null_frag, null_frag,
defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, null_frag,
v128b, v128b, 0>;
defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, null_frag, null_frag,
defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, null_frag,
v128h, v128h, 1>;
defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, null_frag, null_frag,
defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, null_frag,
v128f, v128f, 2>;
defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, null_frag, null_frag,
defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, null_frag,
v128g, v128g, 3>;
// Compare high logical.
defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, null_frag, null_frag,
defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, null_frag,
v128b, v128b, 0>;
defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, null_frag, null_frag,
defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, null_frag,
v128h, v128h, 1>;
defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, null_frag, null_frag,
defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, null_frag,
v128f, v128f, 2>;
defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, null_frag, null_frag,
defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, null_frag,
v128g, v128g, 3>;
// Test under mask.
@ -685,6 +842,44 @@ let Predicates = [FeatureVector] in {
v64g, v64db, 3, 8>;
}
//===----------------------------------------------------------------------===//
// Conversions
//===----------------------------------------------------------------------===//
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
//===----------------------------------------------------------------------===//
// Replicating scalars
//===----------------------------------------------------------------------===//
// Define patterns for replicating a scalar GR32 into a vector of type TYPE.
// INDEX is 8 minus the element size in bytes.
class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index>
: Pat<(type (z_replicate GR32:$scalar)),
(insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>;
def : VectorReplicateScalar<v16i8, VREPB, 7>;
def : VectorReplicateScalar<v8i16, VREPH, 3>;
def : VectorReplicateScalar<v4i32, VREPF, 1>;
// i64 replications are just a single isntruction.
def : Pat<(v2i64 (z_replicate GR64:$scalar)),
(VLVGP GR64:$scalar, GR64:$scalar)>;
//===----------------------------------------------------------------------===//
// String instructions
//===----------------------------------------------------------------------===//

View File

@ -82,6 +82,45 @@ def SDT_ZPrefetch : SDTypeProfile<0, 2,
def SDT_ZTBegin : SDTypeProfile<0, 2,
[SDTCisPtrTy<0>,
SDTCisVT<1, i32>]>;
def SDT_ZInsertVectorElt : SDTypeProfile<1, 3,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisVT<3, i32>]>;
def SDT_ZExtractVectorElt : SDTypeProfile<1, 2,
[SDTCisVec<1>,
SDTCisVT<2, i32>]>;
def SDT_ZReplicate : SDTypeProfile<1, 1,
[SDTCisVec<0>]>;
def SDT_ZVecBinary : SDTypeProfile<1, 2,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def SDT_ZVecBinaryInt : SDTypeProfile<1, 2,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDT_ZVecBinaryConv : SDTypeProfile<1, 2,
[SDTCisVec<0>,
SDTCisVec<1>,
SDTCisSameAs<1, 2>]>;
def SDT_ZRotateMask : SDTypeProfile<1, 2,
[SDTCisVec<0>,
SDTCisVT<1, i32>,
SDTCisVT<2, i32>]>;
def SDT_ZJoinDwords : SDTypeProfile<1, 2,
[SDTCisVT<0, v2i64>,
SDTCisVT<1, i64>,
SDTCisVT<2, i64>]>;
def SDT_ZVecTernary : SDTypeProfile<1, 3,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisVT<3, i32>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@ -134,6 +173,34 @@ def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
[SDNPHasChain, SDNPMayStore]>;
// Defined because the index is an i32 rather than a pointer.
def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
SDT_ZInsertVectorElt>;
def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
SDT_ZExtractVectorElt>;
def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>;
def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>;
def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>;
def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>;
def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS",
SDT_ZVecTernaryInt>;
def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
: SDNode<"SystemZISD::"##name, profile,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
@ -192,6 +259,10 @@ def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>;
def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>;
def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
@ -215,11 +286,21 @@ def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
// Match extensions of an i32 to an i64, followed by an in-register sign
// extension from a sub-i32 value.
def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>;
def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>;
// Register zero-extend operations. Sub-32-bit values are represented as i32s.
def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
// Match extensions of an i32 to an i64, followed by an AND of the low
// i8 or i16 part.
def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>;
def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>;
// Typed floating-point loads.
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
@ -383,6 +464,10 @@ def z_iabs64 : PatFrag<(ops node:$src),
def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
// Integer multiply-and-add
def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(add (mul node:$src1, node:$src2), node:$src3)>;
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@ -403,3 +488,88 @@ class loadu<SDPatternOperator operator, SDPatternOperator load = load>
class storeu<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$value, node:$addr),
(store (operator node:$value), node:$addr)>;
// Vector representation of all-zeros and all-ones.
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
// Load a scalar and replicate it in all elements of a vector.
class z_replicate_load<ValueType scalartype, SDPatternOperator load>
: PatFrag<(ops node:$addr),
(z_replicate (scalartype (load node:$addr)))>;
def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
def z_replicate_loadi32 : z_replicate_load<i32, load>;
def z_replicate_loadi64 : z_replicate_load<i64, load>;
// Load a scalar and insert it into a single element of a vector.
class z_vle<ValueType scalartype, SDPatternOperator load>
: PatFrag<(ops node:$vec, node:$addr, node:$index),
(z_vector_insert node:$vec, (scalartype (load node:$addr)),
node:$index)>;
def z_vlei8 : z_vle<i32, anyextloadi8>;
def z_vlei16 : z_vle<i32, anyextloadi16>;
def z_vlei32 : z_vle<i32, load>;
def z_vlei64 : z_vle<i64, load>;
// Load a scalar and insert it into the low element of the high i64 of a
// zeroed vector.
class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
: PatFrag<(ops node:$addr),
(z_vector_insert (z_vzero),
(scalartype (load node:$addr)), (i32 index))>;
def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
def z_vllezi32 : z_vllez<i32, load, 1>;
def z_vllezi64 : PatFrag<(ops node:$addr),
(z_join_dwords (i64 (load node:$addr)), (i64 0))>;
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
: PatFrag<(ops node:$vec, node:$addr, node:$index),
(store (scalartype (z_vector_extract node:$vec, node:$index)),
node:$addr)>;
def z_vstei8 : z_vste<i32, truncstorei8>;
def z_vstei16 : z_vste<i32, truncstorei16>;
def z_vstei32 : z_vste<i32, store>;
def z_vstei64 : z_vste<i64, store>;
// Arithmetic negation on vectors.
def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
// Bitwise negation on vectors.
def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
// Signed "integer greater than zero" on vectors.
def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
// Signed "integer less than zero" on vectors.
def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
// Integer absolute on vectors.
class z_viabs<int shift>
: PatFrag<(ops node:$src),
(xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
(z_vsra_by_scalar node:$src, (i32 shift)))>;
def z_viabs8 : z_viabs<7>;
def z_viabs16 : z_viabs<15>;
def z_viabs32 : z_viabs<31>;
def z_viabs64 : z_viabs<63>;
// Sign-extend the i64 elements of a vector.
class z_vse<int shift>
: PatFrag<(ops node:$src),
(z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>;
def z_vsei8 : z_vse<56>;
def z_vsei16 : z_vse<48>;
def z_vsei32 : z_vse<32>;
// ...and again with the extensions being done on individual i64 scalars.
class z_vse_by_parts<SDPatternOperator operator, int index1, int index2>
: PatFrag<(ops node:$src),
(z_join_dwords
(operator (z_vector_extract node:$src, index1)),
(operator (z_vector_extract node:$src, index2)))>;
def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>;
def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>;
def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>;

View File

@ -21,15 +21,70 @@ extern "C" void LLVMInitializeSystemZTarget() {
RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
}
// Determine whether we use the vector ABI.
static bool UsesVectorABI(StringRef CPU, StringRef FS) {
// We use the vector ABI whenever the vector facility is avaiable.
// This is the case by default if CPU is z13 or later, and can be
// overridden via "[+-]vector" feature string elements.
bool VectorABI = true;
if (CPU.empty() || CPU == "generic" ||
CPU == "z10" || CPU == "z196" || CPU == "zEC12")
VectorABI = false;
SmallVector<StringRef, 3> Features;
FS.split(Features, ",", -1, false /* KeepEmpty */);
for (auto &Feature : Features) {
if (Feature == "vector" || Feature == "+vector")
VectorABI = true;
if (Feature == "-vector")
VectorABI = false;
}
return VectorABI;
}
static std::string computeDataLayout(StringRef TT, StringRef CPU,
StringRef FS) {
const Triple Triple(TT);
bool VectorABI = UsesVectorABI(CPU, FS);
std::string Ret = "";
// Big endian.
Ret += "E";
// Data mangling.
Ret += DataLayout::getManglingComponent(Triple);
// Make sure that global data has at least 16 bits of alignment by
// default, so that we can refer to it using LARL. We don't have any
// special requirements for stack variables though.
Ret += "-i1:8:16-i8:8:16";
// 64-bit integers are naturally aligned.
Ret += "-i64:64";
// 128-bit floats are aligned only to 64 bits.
Ret += "-f128:64";
// When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
if (VectorABI)
Ret += "-v128:64";
// We prefer 16 bits of aligned for all globals; see above.
Ret += "-a:8:16";
// Integer registers are 32 or 64 bits.
Ret += "-n32:64";
return Ret;
}
SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
// Make sure that global data has at least 16 bits of alignment by
// default, so that we can refer to it using LARL. We don't have any
// special requirements for stack variables though.
: LLVMTargetMachine(T, "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64",
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS),
TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {

View File

@ -238,3 +238,21 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
// be used in an address.
return 14;
if (ST->hasVector())
return 32;
return 0;
}
unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
if (!Vector)
return 64;
if (ST->hasVector())
return 128;
return 0;
}

View File

@ -63,6 +63,14 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @}
/// \name Vector TTI Implementations
/// @{
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
/// @}
};
} // end namespace llvm

View File

@ -0,0 +1,314 @@
; Test spilling of vector registers.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; We need to allocate a 16-byte spill slot and save the 8 call-saved FPRs.
; The frame size should be exactly 160 + 16 + 8 * 8 = 240.
define void @f1(<16 x i8> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: aghi %r15, -240
; CHECK-DAG: std %f8,
; CHECK-DAG: std %f9,
; CHECK-DAG: std %f10,
; CHECK-DAG: std %f11,
; CHECK-DAG: std %f12,
; CHECK-DAG: std %f13,
; CHECK-DAG: std %f14,
; CHECK-DAG: std %f15,
; CHECK: vst {{%v[0-9]+}}, 160(%r15)
; CHECK: vl {{%v[0-9]+}}, 160(%r15)
; CHECK-DAG: ld %f8,
; CHECK-DAG: ld %f9,
; CHECK-DAG: ld %f10,
; CHECK-DAG: ld %f11,
; CHECK-DAG: ld %f12,
; CHECK-DAG: ld %f13,
; CHECK-DAG: ld %f14,
; CHECK-DAG: ld %f15,
; CHECK: aghi %r15, 240
; CHECK: br %r14
%v0 = load volatile <16 x i8>, <16 x i8> *%ptr
%v1 = load volatile <16 x i8>, <16 x i8> *%ptr
%v2 = load volatile <16 x i8>, <16 x i8> *%ptr
%v3 = load volatile <16 x i8>, <16 x i8> *%ptr
%v4 = load volatile <16 x i8>, <16 x i8> *%ptr
%v5 = load volatile <16 x i8>, <16 x i8> *%ptr
%v6 = load volatile <16 x i8>, <16 x i8> *%ptr
%v7 = load volatile <16 x i8>, <16 x i8> *%ptr
%v8 = load volatile <16 x i8>, <16 x i8> *%ptr
%v9 = load volatile <16 x i8>, <16 x i8> *%ptr
%v10 = load volatile <16 x i8>, <16 x i8> *%ptr
%v11 = load volatile <16 x i8>, <16 x i8> *%ptr
%v12 = load volatile <16 x i8>, <16 x i8> *%ptr
%v13 = load volatile <16 x i8>, <16 x i8> *%ptr
%v14 = load volatile <16 x i8>, <16 x i8> *%ptr
%v15 = load volatile <16 x i8>, <16 x i8> *%ptr
%v16 = load volatile <16 x i8>, <16 x i8> *%ptr
%v17 = load volatile <16 x i8>, <16 x i8> *%ptr
%v18 = load volatile <16 x i8>, <16 x i8> *%ptr
%v19 = load volatile <16 x i8>, <16 x i8> *%ptr
%v20 = load volatile <16 x i8>, <16 x i8> *%ptr
%v21 = load volatile <16 x i8>, <16 x i8> *%ptr
%v22 = load volatile <16 x i8>, <16 x i8> *%ptr
%v23 = load volatile <16 x i8>, <16 x i8> *%ptr
%v24 = load volatile <16 x i8>, <16 x i8> *%ptr
%v25 = load volatile <16 x i8>, <16 x i8> *%ptr
%v26 = load volatile <16 x i8>, <16 x i8> *%ptr
%v27 = load volatile <16 x i8>, <16 x i8> *%ptr
%v28 = load volatile <16 x i8>, <16 x i8> *%ptr
%v29 = load volatile <16 x i8>, <16 x i8> *%ptr
%v30 = load volatile <16 x i8>, <16 x i8> *%ptr
%v31 = load volatile <16 x i8>, <16 x i8> *%ptr
%vx = load volatile <16 x i8>, <16 x i8> *%ptr
store volatile <16 x i8> %vx, <16 x i8> *%ptr
store volatile <16 x i8> %v31, <16 x i8> *%ptr
store volatile <16 x i8> %v30, <16 x i8> *%ptr
store volatile <16 x i8> %v29, <16 x i8> *%ptr
store volatile <16 x i8> %v28, <16 x i8> *%ptr
store volatile <16 x i8> %v27, <16 x i8> *%ptr
store volatile <16 x i8> %v26, <16 x i8> *%ptr
store volatile <16 x i8> %v25, <16 x i8> *%ptr
store volatile <16 x i8> %v24, <16 x i8> *%ptr
store volatile <16 x i8> %v23, <16 x i8> *%ptr
store volatile <16 x i8> %v22, <16 x i8> *%ptr
store volatile <16 x i8> %v21, <16 x i8> *%ptr
store volatile <16 x i8> %v20, <16 x i8> *%ptr
store volatile <16 x i8> %v19, <16 x i8> *%ptr
store volatile <16 x i8> %v18, <16 x i8> *%ptr
store volatile <16 x i8> %v17, <16 x i8> *%ptr
store volatile <16 x i8> %v16, <16 x i8> *%ptr
store volatile <16 x i8> %v15, <16 x i8> *%ptr
store volatile <16 x i8> %v14, <16 x i8> *%ptr
store volatile <16 x i8> %v13, <16 x i8> *%ptr
store volatile <16 x i8> %v12, <16 x i8> *%ptr
store volatile <16 x i8> %v11, <16 x i8> *%ptr
store volatile <16 x i8> %v10, <16 x i8> *%ptr
store volatile <16 x i8> %v9, <16 x i8> *%ptr
store volatile <16 x i8> %v8, <16 x i8> *%ptr
store volatile <16 x i8> %v7, <16 x i8> *%ptr
store volatile <16 x i8> %v6, <16 x i8> *%ptr
store volatile <16 x i8> %v5, <16 x i8> *%ptr
store volatile <16 x i8> %v4, <16 x i8> *%ptr
store volatile <16 x i8> %v3, <16 x i8> *%ptr
store volatile <16 x i8> %v2, <16 x i8> *%ptr
store volatile <16 x i8> %v1, <16 x i8> *%ptr
store volatile <16 x i8> %v0, <16 x i8> *%ptr
ret void
}
; Like f1, but no 16-byte slot should be needed.
define void @f2(<16 x i8> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: aghi %r15, -224
; CHECK-DAG: std %f8,
; CHECK-DAG: std %f9,
; CHECK-DAG: std %f10,
; CHECK-DAG: std %f11,
; CHECK-DAG: std %f12,
; CHECK-DAG: std %f13,
; CHECK-DAG: std %f14,
; CHECK-DAG: std %f15,
; CHECK-NOT: vst {{.*}}(%r15)
; CHECK-NOT: vl {{.*}}(%r15)
; CHECK-DAG: ld %f8,
; CHECK-DAG: ld %f9,
; CHECK-DAG: ld %f10,
; CHECK-DAG: ld %f11,
; CHECK-DAG: ld %f12,
; CHECK-DAG: ld %f13,
; CHECK-DAG: ld %f14,
; CHECK-DAG: ld %f15,
; CHECK: aghi %r15, 224
; CHECK: br %r14
%v0 = load volatile <16 x i8>, <16 x i8> *%ptr
%v1 = load volatile <16 x i8>, <16 x i8> *%ptr
%v2 = load volatile <16 x i8>, <16 x i8> *%ptr
%v3 = load volatile <16 x i8>, <16 x i8> *%ptr
%v4 = load volatile <16 x i8>, <16 x i8> *%ptr
%v5 = load volatile <16 x i8>, <16 x i8> *%ptr
%v6 = load volatile <16 x i8>, <16 x i8> *%ptr
%v7 = load volatile <16 x i8>, <16 x i8> *%ptr
%v8 = load volatile <16 x i8>, <16 x i8> *%ptr
%v9 = load volatile <16 x i8>, <16 x i8> *%ptr
%v10 = load volatile <16 x i8>, <16 x i8> *%ptr
%v11 = load volatile <16 x i8>, <16 x i8> *%ptr
%v12 = load volatile <16 x i8>, <16 x i8> *%ptr
%v13 = load volatile <16 x i8>, <16 x i8> *%ptr
%v14 = load volatile <16 x i8>, <16 x i8> *%ptr
%v15 = load volatile <16 x i8>, <16 x i8> *%ptr
%v16 = load volatile <16 x i8>, <16 x i8> *%ptr
%v17 = load volatile <16 x i8>, <16 x i8> *%ptr
%v18 = load volatile <16 x i8>, <16 x i8> *%ptr
%v19 = load volatile <16 x i8>, <16 x i8> *%ptr
%v20 = load volatile <16 x i8>, <16 x i8> *%ptr
%v21 = load volatile <16 x i8>, <16 x i8> *%ptr
%v22 = load volatile <16 x i8>, <16 x i8> *%ptr
%v23 = load volatile <16 x i8>, <16 x i8> *%ptr
%v24 = load volatile <16 x i8>, <16 x i8> *%ptr
%v25 = load volatile <16 x i8>, <16 x i8> *%ptr
%v26 = load volatile <16 x i8>, <16 x i8> *%ptr
%v27 = load volatile <16 x i8>, <16 x i8> *%ptr
%v28 = load volatile <16 x i8>, <16 x i8> *%ptr
%v29 = load volatile <16 x i8>, <16 x i8> *%ptr
%v30 = load volatile <16 x i8>, <16 x i8> *%ptr
%v31 = load volatile <16 x i8>, <16 x i8> *%ptr
store volatile <16 x i8> %v31, <16 x i8> *%ptr
store volatile <16 x i8> %v30, <16 x i8> *%ptr
store volatile <16 x i8> %v29, <16 x i8> *%ptr
store volatile <16 x i8> %v28, <16 x i8> *%ptr
store volatile <16 x i8> %v27, <16 x i8> *%ptr
store volatile <16 x i8> %v26, <16 x i8> *%ptr
store volatile <16 x i8> %v25, <16 x i8> *%ptr
store volatile <16 x i8> %v24, <16 x i8> *%ptr
store volatile <16 x i8> %v23, <16 x i8> *%ptr
store volatile <16 x i8> %v22, <16 x i8> *%ptr
store volatile <16 x i8> %v21, <16 x i8> *%ptr
store volatile <16 x i8> %v20, <16 x i8> *%ptr
store volatile <16 x i8> %v19, <16 x i8> *%ptr
store volatile <16 x i8> %v18, <16 x i8> *%ptr
store volatile <16 x i8> %v17, <16 x i8> *%ptr
store volatile <16 x i8> %v16, <16 x i8> *%ptr
store volatile <16 x i8> %v15, <16 x i8> *%ptr
store volatile <16 x i8> %v14, <16 x i8> *%ptr
store volatile <16 x i8> %v13, <16 x i8> *%ptr
store volatile <16 x i8> %v12, <16 x i8> *%ptr
store volatile <16 x i8> %v11, <16 x i8> *%ptr
store volatile <16 x i8> %v10, <16 x i8> *%ptr
store volatile <16 x i8> %v9, <16 x i8> *%ptr
store volatile <16 x i8> %v8, <16 x i8> *%ptr
store volatile <16 x i8> %v7, <16 x i8> *%ptr
store volatile <16 x i8> %v6, <16 x i8> *%ptr
store volatile <16 x i8> %v5, <16 x i8> *%ptr
store volatile <16 x i8> %v4, <16 x i8> *%ptr
store volatile <16 x i8> %v3, <16 x i8> *%ptr
store volatile <16 x i8> %v2, <16 x i8> *%ptr
store volatile <16 x i8> %v1, <16 x i8> *%ptr
store volatile <16 x i8> %v0, <16 x i8> *%ptr
ret void
}
; Like f2, but only %f8 should be saved.
define void @f3(<16 x i8> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: aghi %r15, -168
; CHECK-DAG: std %f8,
; CHECK-NOT: vst {{.*}}(%r15)
; CHECK-NOT: vl {{.*}}(%r15)
; CHECK-NOT: %v9
; CHECK-NOT: %v10
; CHECK-NOT: %v11
; CHECK-NOT: %v12
; CHECK-NOT: %v13
; CHECK-NOT: %v14
; CHECK-NOT: %v15
; CHECK-DAG: ld %f8,
; CHECK: aghi %r15, 168
; CHECK: br %r14
%v0 = load volatile <16 x i8>, <16 x i8> *%ptr
%v1 = load volatile <16 x i8>, <16 x i8> *%ptr
%v2 = load volatile <16 x i8>, <16 x i8> *%ptr
%v3 = load volatile <16 x i8>, <16 x i8> *%ptr
%v4 = load volatile <16 x i8>, <16 x i8> *%ptr
%v5 = load volatile <16 x i8>, <16 x i8> *%ptr
%v6 = load volatile <16 x i8>, <16 x i8> *%ptr
%v7 = load volatile <16 x i8>, <16 x i8> *%ptr
%v8 = load volatile <16 x i8>, <16 x i8> *%ptr
%v16 = load volatile <16 x i8>, <16 x i8> *%ptr
%v17 = load volatile <16 x i8>, <16 x i8> *%ptr
%v18 = load volatile <16 x i8>, <16 x i8> *%ptr
%v19 = load volatile <16 x i8>, <16 x i8> *%ptr
%v20 = load volatile <16 x i8>, <16 x i8> *%ptr
%v21 = load volatile <16 x i8>, <16 x i8> *%ptr
%v22 = load volatile <16 x i8>, <16 x i8> *%ptr
%v23 = load volatile <16 x i8>, <16 x i8> *%ptr
%v24 = load volatile <16 x i8>, <16 x i8> *%ptr
%v25 = load volatile <16 x i8>, <16 x i8> *%ptr
%v26 = load volatile <16 x i8>, <16 x i8> *%ptr
%v27 = load volatile <16 x i8>, <16 x i8> *%ptr
%v28 = load volatile <16 x i8>, <16 x i8> *%ptr
%v29 = load volatile <16 x i8>, <16 x i8> *%ptr
%v30 = load volatile <16 x i8>, <16 x i8> *%ptr
%v31 = load volatile <16 x i8>, <16 x i8> *%ptr
store volatile <16 x i8> %v31, <16 x i8> *%ptr
store volatile <16 x i8> %v30, <16 x i8> *%ptr
store volatile <16 x i8> %v29, <16 x i8> *%ptr
store volatile <16 x i8> %v28, <16 x i8> *%ptr
store volatile <16 x i8> %v27, <16 x i8> *%ptr
store volatile <16 x i8> %v26, <16 x i8> *%ptr
store volatile <16 x i8> %v25, <16 x i8> *%ptr
store volatile <16 x i8> %v24, <16 x i8> *%ptr
store volatile <16 x i8> %v23, <16 x i8> *%ptr
store volatile <16 x i8> %v22, <16 x i8> *%ptr
store volatile <16 x i8> %v21, <16 x i8> *%ptr
store volatile <16 x i8> %v20, <16 x i8> *%ptr
store volatile <16 x i8> %v19, <16 x i8> *%ptr
store volatile <16 x i8> %v18, <16 x i8> *%ptr
store volatile <16 x i8> %v17, <16 x i8> *%ptr
store volatile <16 x i8> %v16, <16 x i8> *%ptr
store volatile <16 x i8> %v8, <16 x i8> *%ptr
store volatile <16 x i8> %v7, <16 x i8> *%ptr
store volatile <16 x i8> %v6, <16 x i8> *%ptr
store volatile <16 x i8> %v5, <16 x i8> *%ptr
store volatile <16 x i8> %v4, <16 x i8> *%ptr
store volatile <16 x i8> %v3, <16 x i8> *%ptr
store volatile <16 x i8> %v2, <16 x i8> *%ptr
store volatile <16 x i8> %v1, <16 x i8> *%ptr
store volatile <16 x i8> %v0, <16 x i8> *%ptr
ret void
}
; Like f2, but no registers should be saved.
define void @f4(<16 x i8> *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: %r15
; CHECK: br %r14
%v0 = load volatile <16 x i8>, <16 x i8> *%ptr
%v1 = load volatile <16 x i8>, <16 x i8> *%ptr
%v2 = load volatile <16 x i8>, <16 x i8> *%ptr
%v3 = load volatile <16 x i8>, <16 x i8> *%ptr
%v4 = load volatile <16 x i8>, <16 x i8> *%ptr
%v5 = load volatile <16 x i8>, <16 x i8> *%ptr
%v6 = load volatile <16 x i8>, <16 x i8> *%ptr
%v7 = load volatile <16 x i8>, <16 x i8> *%ptr
%v16 = load volatile <16 x i8>, <16 x i8> *%ptr
%v17 = load volatile <16 x i8>, <16 x i8> *%ptr
%v18 = load volatile <16 x i8>, <16 x i8> *%ptr
%v19 = load volatile <16 x i8>, <16 x i8> *%ptr
%v20 = load volatile <16 x i8>, <16 x i8> *%ptr
%v21 = load volatile <16 x i8>, <16 x i8> *%ptr
%v22 = load volatile <16 x i8>, <16 x i8> *%ptr
%v23 = load volatile <16 x i8>, <16 x i8> *%ptr
%v24 = load volatile <16 x i8>, <16 x i8> *%ptr
%v25 = load volatile <16 x i8>, <16 x i8> *%ptr
%v26 = load volatile <16 x i8>, <16 x i8> *%ptr
%v27 = load volatile <16 x i8>, <16 x i8> *%ptr
%v28 = load volatile <16 x i8>, <16 x i8> *%ptr
%v29 = load volatile <16 x i8>, <16 x i8> *%ptr
%v30 = load volatile <16 x i8>, <16 x i8> *%ptr
%v31 = load volatile <16 x i8>, <16 x i8> *%ptr
store volatile <16 x i8> %v31, <16 x i8> *%ptr
store volatile <16 x i8> %v30, <16 x i8> *%ptr
store volatile <16 x i8> %v29, <16 x i8> *%ptr
store volatile <16 x i8> %v28, <16 x i8> *%ptr
store volatile <16 x i8> %v27, <16 x i8> *%ptr
store volatile <16 x i8> %v26, <16 x i8> *%ptr
store volatile <16 x i8> %v25, <16 x i8> *%ptr
store volatile <16 x i8> %v24, <16 x i8> *%ptr
store volatile <16 x i8> %v23, <16 x i8> *%ptr
store volatile <16 x i8> %v22, <16 x i8> *%ptr
store volatile <16 x i8> %v21, <16 x i8> *%ptr
store volatile <16 x i8> %v20, <16 x i8> *%ptr
store volatile <16 x i8> %v19, <16 x i8> *%ptr
store volatile <16 x i8> %v18, <16 x i8> *%ptr
store volatile <16 x i8> %v17, <16 x i8> *%ptr
store volatile <16 x i8> %v16, <16 x i8> *%ptr
store volatile <16 x i8> %v7, <16 x i8> *%ptr
store volatile <16 x i8> %v6, <16 x i8> *%ptr
store volatile <16 x i8> %v5, <16 x i8> *%ptr
store volatile <16 x i8> %v4, <16 x i8> *%ptr
store volatile <16 x i8> %v3, <16 x i8> *%ptr
store volatile <16 x i8> %v2, <16 x i8> *%ptr
store volatile <16 x i8> %v1, <16 x i8> *%ptr
store volatile <16 x i8> %v0, <16 x i8> *%ptr
ret void
}

View File

@ -0,0 +1,49 @@
; Verify that we use the vector ABI datalayout if and only if
; the vector facility is present.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
%struct.S = type { i8, <2 x i64> }
define void @test(%struct.S* %s) nounwind {
; CHECK-VECTOR-LABEL: @test
; CHECK-VECTOR: vl %v0, 8(%r2)
; CHECK-NOVECTOR-LABEL: @test
; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
%ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
%vec = load <2 x i64>, <2 x i64>* %ptr
%add = add <2 x i64> %vec, <i64 1, i64 1>
store <2 x i64> %add, <2 x i64>* %ptr
ret void
}

View File

@ -0,0 +1,146 @@
; Test v16i8 absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <16 x i8> @f1(<16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: vlpb %v24, %v24
; CHECK: br %r14
%cmp = icmp slt <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
ret <16 x i8> %ret
}
; Test with sle.
define <16 x i8> @f2(<16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: vlpb %v24, %v24
; CHECK: br %r14
%cmp = icmp sle <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
ret <16 x i8> %ret
}
; Test with sgt.
define <16 x i8> @f3(<16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK: vlpb %v24, %v24
; CHECK: br %r14
%cmp = icmp sgt <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
ret <16 x i8> %ret
}
; Test with sge.
define <16 x i8> @f4(<16 x i8> %val) {
; CHECK-LABEL: f4:
; CHECK: vlpb %v24, %v24
; CHECK: br %r14
%cmp = icmp sge <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
ret <16 x i8> %ret
}
; Test that negative absolute uses VLPB too. There is no vector equivalent
; of LOAD NEGATIVE.
define <16 x i8> @f5(<16 x i8> %val) {
; CHECK-LABEL: f5:
; CHECK: vlpb [[REG:%v[0-9]+]], %v24
; CHECK: vlcb %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%abs = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
%ret = sub <16 x i8> zeroinitializer, %abs
ret <16 x i8> %ret
}
; Try another form of negative absolute (slt version).
define <16 x i8> @f6(<16 x i8> %val) {
; CHECK-LABEL: f6:
; CHECK: vlpb [[REG:%v[0-9]+]], %v24
; CHECK: vlcb %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
ret <16 x i8> %ret
}
; Test with sle.
define <16 x i8> @f7(<16 x i8> %val) {
; CHECK-LABEL: f7:
; CHECK: vlpb [[REG:%v[0-9]+]], %v24
; CHECK: vlcb %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sle <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
ret <16 x i8> %ret
}
; Test with sgt.
define <16 x i8> @f8(<16 x i8> %val) {
; CHECK-LABEL: f8:
; CHECK: vlpb [[REG:%v[0-9]+]], %v24
; CHECK: vlcb %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sgt <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
ret <16 x i8> %ret
}
; Test with sge.
define <16 x i8> @f9(<16 x i8> %val) {
; CHECK-LABEL: f9:
; CHECK: vlpb [[REG:%v[0-9]+]], %v24
; CHECK: vlcb %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sge <16 x i8> %val, zeroinitializer
%neg = sub <16 x i8> zeroinitializer, %val
%ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
ret <16 x i8> %ret
}
; Test with an SRA-based boolean vector.
define <16 x i8> @f10(<16 x i8> %val) {
; CHECK-LABEL: f10:
; CHECK: vlpb %v24, %v24
; CHECK: br %r14
%shr = ashr <16 x i8> %val,
<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%neg = sub <16 x i8> zeroinitializer, %val
%and1 = and <16 x i8> %shr, %neg
%not = xor <16 x i8> %shr,
<i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%and2 = and <16 x i8> %not, %val
%ret = or <16 x i8> %and1, %and2
ret <16 x i8> %ret
}
; ...and again in reverse
define <16 x i8> @f11(<16 x i8> %val) {
; CHECK-LABEL: f11:
; CHECK: vlpb [[REG:%v[0-9]+]], %v24
; CHECK: vlcb %v24, [[REG]]
; CHECK: br %r14
%shr = ashr <16 x i8> %val,
<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%and1 = and <16 x i8> %shr, %val
%not = xor <16 x i8> %shr,
<i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%neg = sub <16 x i8> zeroinitializer, %val
%and2 = and <16 x i8> %not, %neg
%ret = or <16 x i8> %and1, %and2
ret <16 x i8> %ret
}

View File

@ -0,0 +1,142 @@
; Test v8i16 absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <8 x i16> @f1(<8 x i16> %val) {
; CHECK-LABEL: f1:
; CHECK: vlph %v24, %v24
; CHECK: br %r14
%cmp = icmp slt <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
ret <8 x i16> %ret
}
; Test with sle.
define <8 x i16> @f2(<8 x i16> %val) {
; CHECK-LABEL: f2:
; CHECK: vlph %v24, %v24
; CHECK: br %r14
%cmp = icmp sle <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
ret <8 x i16> %ret
}
; Test with sgt.
define <8 x i16> @f3(<8 x i16> %val) {
; CHECK-LABEL: f3:
; CHECK: vlph %v24, %v24
; CHECK: br %r14
%cmp = icmp sgt <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
ret <8 x i16> %ret
}
; Test with sge.
define <8 x i16> @f4(<8 x i16> %val) {
; CHECK-LABEL: f4:
; CHECK: vlph %v24, %v24
; CHECK: br %r14
%cmp = icmp sge <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
ret <8 x i16> %ret
}
; Test that negative absolute uses VLPH too. There is no vector equivalent
; of LOAD NEGATIVE.
define <8 x i16> @f5(<8 x i16> %val) {
; CHECK-LABEL: f5:
; CHECK: vlph [[REG:%v[0-9]+]], %v24
; CHECK: vlch %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%abs = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
%ret = sub <8 x i16> zeroinitializer, %abs
ret <8 x i16> %ret
}
; Try another form of negative absolute (slt version).
define <8 x i16> @f6(<8 x i16> %val) {
; CHECK-LABEL: f6:
; CHECK: vlph [[REG:%v[0-9]+]], %v24
; CHECK: vlch %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
ret <8 x i16> %ret
}
; Test with sle.
define <8 x i16> @f7(<8 x i16> %val) {
; CHECK-LABEL: f7:
; CHECK: vlph [[REG:%v[0-9]+]], %v24
; CHECK: vlch %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sle <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
ret <8 x i16> %ret
}
; Test with sgt.
define <8 x i16> @f8(<8 x i16> %val) {
; CHECK-LABEL: f8:
; CHECK: vlph [[REG:%v[0-9]+]], %v24
; CHECK: vlch %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sgt <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
ret <8 x i16> %ret
}
; Test with sge.
define <8 x i16> @f9(<8 x i16> %val) {
; CHECK-LABEL: f9:
; CHECK: vlph [[REG:%v[0-9]+]], %v24
; CHECK: vlch %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sge <8 x i16> %val, zeroinitializer
%neg = sub <8 x i16> zeroinitializer, %val
%ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
ret <8 x i16> %ret
}
; Test with an SRA-based boolean vector.
define <8 x i16> @f10(<8 x i16> %val) {
; CHECK-LABEL: f10:
; CHECK: vlph %v24, %v24
; CHECK: br %r14
%shr = ashr <8 x i16> %val,
<i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%neg = sub <8 x i16> zeroinitializer, %val
%and1 = and <8 x i16> %shr, %neg
%not = xor <8 x i16> %shr,
<i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%and2 = and <8 x i16> %not, %val
%ret = or <8 x i16> %and1, %and2
ret <8 x i16> %ret
}
; ...and again in reverse
define <8 x i16> @f11(<8 x i16> %val) {
; CHECK-LABEL: f11:
; CHECK: vlph [[REG:%v[0-9]+]], %v24
; CHECK: vlch %v24, [[REG]]
; CHECK: br %r14
%shr = ashr <8 x i16> %val,
<i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%and1 = and <8 x i16> %shr, %val
%not = xor <8 x i16> %shr,
<i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%neg = sub <8 x i16> zeroinitializer, %val
%and2 = and <8 x i16> %not, %neg
%ret = or <8 x i16> %and1, %and2
ret <8 x i16> %ret
}

View File

@ -0,0 +1,138 @@
; Test v4i32 absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <4 x i32> @f1(<4 x i32> %val) {
; CHECK-LABEL: f1:
; CHECK: vlpf %v24, %v24
; CHECK: br %r14
%cmp = icmp slt <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
ret <4 x i32> %ret
}
; Test with sle.
define <4 x i32> @f2(<4 x i32> %val) {
; CHECK-LABEL: f2:
; CHECK: vlpf %v24, %v24
; CHECK: br %r14
%cmp = icmp sle <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
ret <4 x i32> %ret
}
; Test with sgt.
define <4 x i32> @f3(<4 x i32> %val) {
; CHECK-LABEL: f3:
; CHECK: vlpf %v24, %v24
; CHECK: br %r14
%cmp = icmp sgt <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
ret <4 x i32> %ret
}
; Test with sge.
define <4 x i32> @f4(<4 x i32> %val) {
; CHECK-LABEL: f4:
; CHECK: vlpf %v24, %v24
; CHECK: br %r14
%cmp = icmp sge <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
ret <4 x i32> %ret
}
; Test that negative absolute uses VLPF too. There is no vector equivalent
; of LOAD NEGATIVE.
define <4 x i32> @f5(<4 x i32> %val) {
; CHECK-LABEL: f5:
; CHECK: vlpf [[REG:%v[0-9]+]], %v24
; CHECK: vlcf %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%abs = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
%ret = sub <4 x i32> zeroinitializer, %abs
ret <4 x i32> %ret
}
; Try another form of negative absolute (slt version).
define <4 x i32> @f6(<4 x i32> %val) {
; CHECK-LABEL: f6:
; CHECK: vlpf [[REG:%v[0-9]+]], %v24
; CHECK: vlcf %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
ret <4 x i32> %ret
}
; Test with sle.
define <4 x i32> @f7(<4 x i32> %val) {
; CHECK-LABEL: f7:
; CHECK: vlpf [[REG:%v[0-9]+]], %v24
; CHECK: vlcf %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sle <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
ret <4 x i32> %ret
}
; Test with sgt.
define <4 x i32> @f8(<4 x i32> %val) {
; CHECK-LABEL: f8:
; CHECK: vlpf [[REG:%v[0-9]+]], %v24
; CHECK: vlcf %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sgt <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
ret <4 x i32> %ret
}
; Test with sge.
define <4 x i32> @f9(<4 x i32> %val) {
; CHECK-LABEL: f9:
; CHECK: vlpf [[REG:%v[0-9]+]], %v24
; CHECK: vlcf %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sge <4 x i32> %val, zeroinitializer
%neg = sub <4 x i32> zeroinitializer, %val
%ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
ret <4 x i32> %ret
}
; Test with an SRA-based boolean vector.
define <4 x i32> @f10(<4 x i32> %val) {
; CHECK-LABEL: f10:
; CHECK: vlpf %v24, %v24
; CHECK: br %r14
%shr = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
%neg = sub <4 x i32> zeroinitializer, %val
%and1 = and <4 x i32> %shr, %neg
%not = xor <4 x i32> %shr, <i32 -1, i32 -1, i32 -1, i32 -1>
%and2 = and <4 x i32> %not, %val
%ret = or <4 x i32> %and1, %and2
ret <4 x i32> %ret
}
; ...and again in reverse
define <4 x i32> @f11(<4 x i32> %val) {
; CHECK-LABEL: f11:
; CHECK: vlpf [[REG:%v[0-9]+]], %v24
; CHECK: vlcf %v24, [[REG]]
; CHECK: br %r14
%shr = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
%and1 = and <4 x i32> %shr, %val
%not = xor <4 x i32> %shr, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg = sub <4 x i32> zeroinitializer, %val
%and2 = and <4 x i32> %not, %neg
%ret = or <4 x i32> %and1, %and2
ret <4 x i32> %ret
}

View File

@ -0,0 +1,138 @@
; Test v2i64 absolute.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <2 x i64> @f1(<2 x i64> %val) {
; CHECK-LABEL: f1:
; CHECK: vlpg %v24, %v24
; CHECK: br %r14
%cmp = icmp slt <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
ret <2 x i64> %ret
}
; Test with sle.
define <2 x i64> @f2(<2 x i64> %val) {
; CHECK-LABEL: f2:
; CHECK: vlpg %v24, %v24
; CHECK: br %r14
%cmp = icmp sle <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
ret <2 x i64> %ret
}
; Test with sgt.
define <2 x i64> @f3(<2 x i64> %val) {
; CHECK-LABEL: f3:
; CHECK: vlpg %v24, %v24
; CHECK: br %r14
%cmp = icmp sgt <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
ret <2 x i64> %ret
}
; Test with sge.
define <2 x i64> @f4(<2 x i64> %val) {
; CHECK-LABEL: f4:
; CHECK: vlpg %v24, %v24
; CHECK: br %r14
%cmp = icmp sge <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
ret <2 x i64> %ret
}
; Test that negative absolute uses VLPG too. There is no vector equivalent
; of LOAD NEGATIVE.
define <2 x i64> @f5(<2 x i64> %val) {
; CHECK-LABEL: f5:
; CHECK: vlpg [[REG:%v[0-9]+]], %v24
; CHECK: vlcg %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%abs = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
%ret = sub <2 x i64> zeroinitializer, %abs
ret <2 x i64> %ret
}
; Try another form of negative absolute (slt version).
define <2 x i64> @f6(<2 x i64> %val) {
; CHECK-LABEL: f6:
; CHECK: vlpg [[REG:%v[0-9]+]], %v24
; CHECK: vlcg %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp slt <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
ret <2 x i64> %ret
}
; Test with sle.
define <2 x i64> @f7(<2 x i64> %val) {
; CHECK-LABEL: f7:
; CHECK: vlpg [[REG:%v[0-9]+]], %v24
; CHECK: vlcg %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sle <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
ret <2 x i64> %ret
}
; Test with sgt.
define <2 x i64> @f8(<2 x i64> %val) {
; CHECK-LABEL: f8:
; CHECK: vlpg [[REG:%v[0-9]+]], %v24
; CHECK: vlcg %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sgt <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
ret <2 x i64> %ret
}
; Test with sge.
define <2 x i64> @f9(<2 x i64> %val) {
; CHECK-LABEL: f9:
; CHECK: vlpg [[REG:%v[0-9]+]], %v24
; CHECK: vlcg %v24, [[REG]]
; CHECK: br %r14
%cmp = icmp sge <2 x i64> %val, zeroinitializer
%neg = sub <2 x i64> zeroinitializer, %val
%ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
ret <2 x i64> %ret
}
; Test with an SRA-based boolean vector.
define <2 x i64> @f10(<2 x i64> %val) {
; CHECK-LABEL: f10:
; CHECK: vlpg %v24, %v24
; CHECK: br %r14
%shr = ashr <2 x i64> %val, <i64 63, i64 63>
%neg = sub <2 x i64> zeroinitializer, %val
%and1 = and <2 x i64> %shr, %neg
%not = xor <2 x i64> %shr, <i64 -1, i64 -1>
%and2 = and <2 x i64> %not, %val
%ret = or <2 x i64> %and1, %and2
ret <2 x i64> %ret
}
; ...and again in reverse
define <2 x i64> @f11(<2 x i64> %val) {
; CHECK-LABEL: f11:
; CHECK: vlpg [[REG:%v[0-9]+]], %v24
; CHECK: vlcg %v24, [[REG]]
; CHECK: br %r14
%shr = ashr <2 x i64> %val, <i64 63, i64 63>
%and1 = and <2 x i64> %shr, %val
%not = xor <2 x i64> %shr, <i64 -1, i64 -1>
%neg = sub <2 x i64> zeroinitializer, %val
%and2 = and <2 x i64> %not, %neg
%ret = or <2 x i64> %and1, %and2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector addition.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 addition.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vab %v24, %v26, %v28
; CHECK: br %r14
%ret = add <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 addition.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vah %v24, %v26, %v28
; CHECK: br %r14
%ret = add <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 addition.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vaf %v24, %v26, %v28
; CHECK: br %r14
%ret = add <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 addition.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vag %v24, %v26, %v28
; CHECK: br %r14
%ret = add <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector AND.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 AND.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vn %v24, %v26, %v28
; CHECK: br %r14
%ret = and <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 AND.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vn %v24, %v26, %v28
; CHECK: br %r14
%ret = and <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 AND.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vn %v24, %v26, %v28
; CHECK: br %r14
%ret = and <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 AND.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vn %v24, %v26, %v28
; CHECK: br %r14
%ret = and <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,91 @@
; Test vector AND-NOT.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 AND-NOT.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vnc %v24, %v26, %v28
; CHECK: br %r14
%not = xor <16 x i8> %val2, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1>
%ret = and <16 x i8> %val1, %not
ret <16 x i8> %ret
}
; ...and again with the reverse.
define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vnc %v24, %v28, %v26
; CHECK: br %r14
%not = xor <16 x i8> %val1, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1>
%ret = and <16 x i8> %not, %val2
ret <16 x i8> %ret
}
; Test a v8i16 AND-NOT.
define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f3:
; CHECK: vnc %v24, %v26, %v28
; CHECK: br %r14
%not = xor <8 x i16> %val2, <i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1>
%ret = and <8 x i16> %val1, %not
ret <8 x i16> %ret
}
; ...and again with the reverse.
define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f4:
; CHECK: vnc %v24, %v28, %v26
; CHECK: br %r14
%not = xor <8 x i16> %val1, <i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1>
%ret = and <8 x i16> %not, %val2
ret <8 x i16> %ret
}
; Test a v4i32 AND-NOT.
define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f5:
; CHECK: vnc %v24, %v26, %v28
; CHECK: br %r14
%not = xor <4 x i32> %val2, <i32 -1, i32 -1, i32 -1, i32 -1>
%ret = and <4 x i32> %val1, %not
ret <4 x i32> %ret
}
; ...and again with the reverse.
define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f6:
; CHECK: vnc %v24, %v28, %v26
; CHECK: br %r14
%not = xor <4 x i32> %val1, <i32 -1, i32 -1, i32 -1, i32 -1>
%ret = and <4 x i32> %not, %val2
ret <4 x i32> %ret
}
; Test a v2i64 AND-NOT.
define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f7:
; CHECK: vnc %v24, %v26, %v28
; CHECK: br %r14
%not = xor <2 x i64> %val2, <i64 -1, i64 -1>
%ret = and <2 x i64> %val1, %not
ret <2 x i64> %ret
}
; ...and again with the reverse.
define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f8:
; CHECK: vnc %v24, %v28, %v26
; CHECK: br %r14
%not = xor <2 x i64> %val1, <i64 -1, i64 -1>
%ret = and <2 x i64> %not, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,113 @@
; Test vector zero extensions, which need to be implemented as ANDs.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i1->v16i8 extension.
define <16 x i8> @f1(<16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: vrepib [[REG:%v[0-9]+]], 1
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <16 x i8> %val to <16 x i1>
%ret = zext <16 x i1> %trunc to <16 x i8>
ret <16 x i8> %ret
}
; Test a v8i1->v8i16 extension.
define <8 x i16> @f2(<8 x i16> %val) {
; CHECK-LABEL: f2:
; CHECK: vrepih [[REG:%v[0-9]+]], 1
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <8 x i16> %val to <8 x i1>
%ret = zext <8 x i1> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; Test a v8i8->v8i16 extension.
define <8 x i16> @f3(<8 x i16> %val) {
; CHECK-LABEL: f3:
; CHECK: vgbm [[REG:%v[0-9]+]], 21845
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <8 x i16> %val to <8 x i8>
%ret = zext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; Test a v4i1->v4i32 extension.
define <4 x i32> @f4(<4 x i32> %val) {
; CHECK-LABEL: f4:
; CHECK: vrepif [[REG:%v[0-9]+]], 1
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <4 x i32> %val to <4 x i1>
%ret = zext <4 x i1> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; Test a v4i8->v4i32 extension.
define <4 x i32> @f5(<4 x i32> %val) {
; CHECK-LABEL: f5:
; CHECK: vgbm [[REG:%v[0-9]+]], 4369
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <4 x i32> %val to <4 x i8>
%ret = zext <4 x i8> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; Test a v4i16->v4i32 extension.
define <4 x i32> @f6(<4 x i32> %val) {
; CHECK-LABEL: f6:
; CHECK: vgbm [[REG:%v[0-9]+]], 13107
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <4 x i32> %val to <4 x i16>
%ret = zext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; Test a v2i1->v2i64 extension.
define <2 x i64> @f7(<2 x i64> %val) {
; CHECK-LABEL: f7:
; CHECK: vrepig [[REG:%v[0-9]+]], 1
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i1>
%ret = zext <2 x i1> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test a v2i8->v2i64 extension.
define <2 x i64> @f8(<2 x i64> %val) {
; CHECK-LABEL: f8:
; CHECK: vgbm [[REG:%v[0-9]+]], 257
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i8>
%ret = zext <2 x i8> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test a v2i16->v2i64 extension.
define <2 x i64> @f9(<2 x i64> %val) {
; CHECK-LABEL: f9:
; CHECK: vgbm [[REG:%v[0-9]+]], 771
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i16>
%ret = zext <2 x i16> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test a v2i32->v2i64 extension.
define <2 x i64> @f10(<2 x i64> %val) {
; CHECK-LABEL: f10:
; CHECK: vgbm [[REG:%v[0-9]+]], 3855
; CHECK: vn %v24, %v24, [[REG]]
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i32>
%ret = zext <2 x i32> %trunc to <2 x i64>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,48 @@
; Test the handling of named vector arguments.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
; This routine has 6 integer arguments, which fill up r2-r5 and
; the stack slot at offset 160, and 10 vector arguments, which
; fill up v24-v31 and the two double-wide stack slots at 168
; and 184.
declare void @bar(i64, i64, i64, i64, i64, i64,
<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
<4 x i32>, <4 x i32>)
define void @foo() {
; CHECK-VEC-LABEL: foo:
; CHECK-VEC-DAG: vrepif %v24, 1
; CHECK-VEC-DAG: vrepif %v26, 2
; CHECK-VEC-DAG: vrepif %v28, 3
; CHECK-VEC-DAG: vrepif %v30, 4
; CHECK-VEC-DAG: vrepif %v25, 5
; CHECK-VEC-DAG: vrepif %v27, 6
; CHECK-VEC-DAG: vrepif %v29, 7
; CHECK-VEC-DAG: vrepif %v31, 8
; CHECK-VEC: brasl %r14, bar@PLT
;
; CHECK-STACK-LABEL: foo:
; CHECK-STACK: aghi %r15, -200
; CHECK-STACK-DAG: mvghi 160(%r15), 6
; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 9
; CHECK-STACK-DAG: vst [[REG1]], 168(%r15)
; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 10
; CHECK-STACK-DAG: vst [[REG2]], 184(%r15)
; CHECK-STACK: brasl %r14, bar@PLT
call void @bar (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6,
<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
<4 x i32> <i32 2, i32 2, i32 2, i32 2>,
<4 x i32> <i32 3, i32 3, i32 3, i32 3>,
<4 x i32> <i32 4, i32 4, i32 4, i32 4>,
<4 x i32> <i32 5, i32 5, i32 5, i32 5>,
<4 x i32> <i32 6, i32 6, i32 6, i32 6>,
<4 x i32> <i32 7, i32 7, i32 7, i32 7>,
<4 x i32> <i32 8, i32 8, i32 8, i32 8>,
<4 x i32> <i32 9, i32 9, i32 9, i32 9>,
<4 x i32> <i32 10, i32 10, i32 10, i32 10>)
ret void
}

View File

@ -0,0 +1,31 @@
; Test the handling of unnamed vector arguments.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
; This routine is called with two named vector argument (passed
; in %v24 and %v26) and two unnamed vector arguments (passed
; in the double-wide stack slots at 160 and 176).
declare void @bar(<4 x i32>, <4 x i32>, ...)
define void @foo() {
; CHECK-VEC-LABEL: foo:
; CHECK-VEC-DAG: vrepif %v24, 1
; CHECK-VEC-DAG: vrepif %v26, 2
; CHECK-VEC: brasl %r14, bar@PLT
;
; CHECK-STACK-LABEL: foo:
; CHECK-STACK: aghi %r15, -192
; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 3
; CHECK-STACK-DAG: vst [[REG1]], 160(%r15)
; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 4
; CHECK-STACK-DAG: vst [[REG2]], 176(%r15)
; CHECK-STACK: brasl %r14, bar@PLT
call void (<4 x i32>, <4 x i32>, ...) @bar
(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
<4 x i32> <i32 2, i32 2, i32 2, i32 2>,
<4 x i32> <i32 3, i32 3, i32 3, i32 3>,
<4 x i32> <i32 4, i32 4, i32 4, i32 4>)
ret void
}

View File

@ -0,0 +1,16 @@
; Test the handling of incoming vector arguments.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; This routine has 10 vector arguments, which fill up %v24-%v31 and
; the two double-wide stack slots at 160 and 176.
define <4 x i32> @foo(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4,
<4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8,
<4 x i32> %v9, <4 x i32> %v10) {
; CHECK-LABEL: foo:
; CHECK: vl [[REG1:%v[0-9]+]], 176(%r15)
; CHECK: vsf %v24, %v26, [[REG1]]
; CHECK: br %r14
%y = sub <4 x i32> %v2, %v10
ret <4 x i32> %y
}

View File

@ -0,0 +1,228 @@
; Test v16i8 comparisons.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test eq.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vceqb %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp eq <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test ne.
define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vceqb [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test sgt.
define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vchb %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp sgt <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test sge.
define <16 x i8> @f4(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vchb [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test sle.
define <16 x i8> @f5(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f5:
; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test slt.
define <16 x i8> @f6(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vchb %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp slt <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test ugt.
define <16 x i8> @f7(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f7:
; CHECK: vchlb %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp ugt <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test uge.
define <16 x i8> @f8(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f8:
; CHECK: vchlb [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test ule.
define <16 x i8> @f9(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f9:
; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test ult.
define <16 x i8> @f10(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f10:
; CHECK: vchlb %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp ult <16 x i8> %val1, %val2
%ret = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %ret
}
; Test eq selects.
define <16 x i8> @f11(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f11:
; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp eq <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test ne selects.
define <16 x i8> @f12(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f12:
; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test sgt selects.
define <16 x i8> @f13(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f13:
; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sgt <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test sge selects.
define <16 x i8> @f14(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f14:
; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test sle selects.
define <16 x i8> @f15(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f15:
; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test slt selects.
define <16 x i8> @f16(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f16:
; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp slt <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test ugt selects.
define <16 x i8> @f17(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f17:
; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ugt <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test uge selects.
define <16 x i8> @f18(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f18:
; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test ule selects.
define <16 x i8> @f19(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f19:
; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}
; Test ult selects.
define <16 x i8> @f20(<16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3, <16 x i8> %val4) {
; CHECK-LABEL: f20:
; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ult <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
ret <16 x i8> %ret
}

View File

@ -0,0 +1,228 @@
; Test v8i16 comparisons.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test eq.
define <8 x i16> @f1(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f1:
; CHECK: vceqh %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp eq <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test ne.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vceqh [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test sgt.
define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f3:
; CHECK: vchh %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp sgt <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test sge.
define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f4:
; CHECK: vchh [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test sle.
define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f5:
; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test slt.
define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f6:
; CHECK: vchh %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp slt <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test ugt.
define <8 x i16> @f7(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f7:
; CHECK: vchlh %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp ugt <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test uge.
define <8 x i16> @f8(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vchlh [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test ule.
define <8 x i16> @f9(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f9:
; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test ult.
define <8 x i16> @f10(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f10:
; CHECK: vchlh %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp ult <8 x i16> %val1, %val2
%ret = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ret
}
; Test eq selects.
define <8 x i16> @f11(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f11:
; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp eq <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test ne selects.
define <8 x i16> @f12(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f12:
; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test sgt selects.
define <8 x i16> @f13(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f13:
; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sgt <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test sge selects.
define <8 x i16> @f14(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f14:
; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test sle selects.
define <8 x i16> @f15(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f15:
; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test slt selects.
define <8 x i16> @f16(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f16:
; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp slt <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test ugt selects.
define <8 x i16> @f17(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f17:
; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ugt <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test uge selects.
define <8 x i16> @f18(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f18:
; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test ule selects.
define <8 x i16> @f19(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f19:
; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}
; Test ult selects.
define <8 x i16> @f20(<8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3, <8 x i16> %val4) {
; CHECK-LABEL: f20:
; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ult <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
ret <8 x i16> %ret
}

View File

@ -0,0 +1,228 @@
; Test v4i32 comparisons.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test eq.
define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f1:
; CHECK: vceqf %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp eq <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test ne.
define <4 x i32> @f2(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f2:
; CHECK: vceqf [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test sgt.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vchf %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp sgt <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test sge.
define <4 x i32> @f4(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f4:
; CHECK: vchf [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test sle.
define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f5:
; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test slt.
define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f6:
; CHECK: vchf %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp slt <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test ugt.
define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f7:
; CHECK: vchlf %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp ugt <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test uge.
define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f8:
; CHECK: vchlf [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test ule.
define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f9:
; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test ult.
define <4 x i32> @f10(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f10:
; CHECK: vchlf %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp ult <4 x i32> %val1, %val2
%ret = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; Test eq selects.
define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f11:
; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp eq <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test ne selects.
define <4 x i32> @f12(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f12:
; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test sgt selects.
define <4 x i32> @f13(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f13:
; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sgt <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test sge selects.
define <4 x i32> @f14(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f14:
; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test sle selects.
define <4 x i32> @f15(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f15:
; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test slt selects.
define <4 x i32> @f16(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f16:
; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp slt <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test ugt selects.
define <4 x i32> @f17(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f17:
; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ugt <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test uge selects.
define <4 x i32> @f18(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f18:
; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test ule selects.
define <4 x i32> @f19(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f19:
; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}
; Test ult selects.
define <4 x i32> @f20(<4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3, <4 x i32> %val4) {
; CHECK-LABEL: f20:
; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ult <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
ret <4 x i32> %ret
}

View File

@ -0,0 +1,228 @@
; Test v2i64 comparisons.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test eq.
define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f1:
; CHECK: vceqg %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp eq <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test ne.
define <2 x i64> @f2(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f2:
; CHECK: vceqg [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test sgt.
define <2 x i64> @f3(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f3:
; CHECK: vchg %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp sgt <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test sge.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vchg [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test sle.
define <2 x i64> @f5(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f5:
; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test slt.
define <2 x i64> @f6(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f6:
; CHECK: vchg %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp slt <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test ugt.
define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f7:
; CHECK: vchlg %v24, %v26, %v28
; CHECK-NEXT: br %r14
%cmp = icmp ugt <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test uge.
define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f8:
; CHECK: vchlg [[REG:%v[0-9]+]], %v28, %v26
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test ule.
define <2 x i64> @f9(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f9:
; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v28
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test ult.
define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f10:
; CHECK: vchlg %v24, %v28, %v26
; CHECK-NEXT: br %r14
%cmp = icmp ult <2 x i64> %val1, %val2
%ret = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %ret
}
; Test eq selects.
define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f11:
; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp eq <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test ne selects.
define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f12:
; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ne <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test sgt selects.
define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f13:
; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sgt <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test sge selects.
define <2 x i64> @f14(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f14:
; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sge <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test sle selects.
define <2 x i64> @f15(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f15:
; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp sle <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test slt selects.
define <2 x i64> @f16(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f16:
; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp slt <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test ugt selects.
define <2 x i64> @f17(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f17:
; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ugt <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test uge selects.
define <2 x i64> @f18(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f18:
; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp uge <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test ule selects.
define <2 x i64> @f19(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f19:
; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ule <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}
; Test ult selects.
define <2 x i64> @f20(<2 x i64> %val1, <2 x i64> %val2,
<2 x i64> %val3, <2 x i64> %val4) {
; CHECK-LABEL: f20:
; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
; CHECK-NEXT: br %r14
%cmp = icmp ult <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
ret <2 x i64> %ret
}

View File

@ -0,0 +1,107 @@
; Test various target-specific DAG combiner patterns.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Check that an extraction followed by a truncation is effectively treated
; as a bitcast.
define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
; CHECK-LABEL: f1:
; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%elem1 = extractelement <4 x i32> %add, i32 0
%elem2 = extractelement <4 x i32> %add, i32 3
%trunc1 = trunc i32 %elem1 to i8
%trunc2 = trunc i32 %elem2 to i8
store i8 %trunc1, i8 *%ptr1
store i8 %trunc2, i8 *%ptr2
ret void
}
; Test a case where a pack-type shuffle can be eliminated.
define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
; CHECK-LABEL: f2:
; CHECK-NOT: vpk
; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
; CHECK: br %r14
%add1 = add <4 x i32> %v1, %v2
%add2 = add <4 x i32> %v2, %v3
%shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 1
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; ...and again in a case where there's also a splat and a bitcast.
define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
; CHECK-LABEL: f3:
; CHECK-NOT: vrepg
; CHECK-NOT: vpk
; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
<2 x i32> <i32 0, i32 0>
%splatcast = bitcast <2 x i64> %splat to <4 x i32>
%shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 2
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; ...and again with a merge low instead of a pack.
define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
; CHECK-LABEL: f4:
; CHECK-NOT: vrepg
; CHECK-NOT: vmr
; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
<2 x i32> <i32 0, i32 0>
%splatcast = bitcast <2 x i64> %splat to <4 x i32>
%shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
<4 x i32> <i32 2, i32 6, i32 3, i32 7>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 4
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; ...and again with a merge high.
define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
; CHECK-LABEL: f5:
; CHECK-NOT: vrepg
; CHECK-NOT: vmr
; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
<2 x i32> <i32 0, i32 0>
%splatcast = bitcast <2 x i64> %splat to <4 x i32>
%shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
<4 x i32> <i32 0, i32 4, i32 1, i32 5>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 4
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}

View File

@ -0,0 +1,55 @@
; Test vector byte masks, v16i8 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test an all-zeros vector.
define <16 x i8> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgbm %v24, 0
; CHECK: br %r14
ret <16 x i8> zeroinitializer
}
; Test an all-ones vector.
define <16 x i8> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgbm %v24, 65535
; CHECK: br %r14
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1>
}
; Test a mixed vector (mask 0x8c75).
define <16 x i8> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgbm %v24, 35957
; CHECK: br %r14
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0,
i8 -1, i8 -1, i8 0, i8 0,
i8 0, i8 -1, i8 -1, i8 -1,
i8 0, i8 -1, i8 0, i8 -1>
}
; Test that undefs are treated as zero.
define <16 x i8> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgbm %v24, 35957
; CHECK: br %r14
ret <16 x i8> <i8 -1, i8 undef, i8 undef, i8 undef,
i8 -1, i8 -1, i8 undef, i8 undef,
i8 undef, i8 -1, i8 -1, i8 -1,
i8 undef, i8 -1, i8 undef, i8 -1>
}
; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
define <16 x i8> @f5() {
; CHECK-LABEL: f5:
; CHECK-NOT: vgbm
; CHECK: br %r14
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0,
i8 -1, i8 -1, i8 0, i8 1,
i8 0, i8 -1, i8 -1, i8 -1,
i8 0, i8 -1, i8 0, i8 -1>
}

View File

@ -0,0 +1,47 @@
; Test vector byte masks, v8i16 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test an all-zeros vector.
define <8 x i16> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgbm %v24, 0
; CHECK: br %r14
ret <8 x i16> zeroinitializer
}
; Test an all-ones vector.
define <8 x i16> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgbm %v24, 65535
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1>
}
; Test a mixed vector (mask 0x8c76).
define <8 x i16> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgbm %v24, 35958
; CHECK: br %r14
ret <8 x i16> <i16 65280, i16 0, i16 65535, i16 0,
i16 255, i16 65535, i16 255, i16 65280>
}
; Test that undefs are treated as zero.
define <8 x i16> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgbm %v24, 35958
; CHECK: br %r14
ret <8 x i16> <i16 65280, i16 undef, i16 65535, i16 undef,
i16 255, i16 65535, i16 255, i16 65280>
}
; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
define <8 x i16> @f5() {
; CHECK-LABEL: f5:
; CHECK-NOT: vgbm
; CHECK: br %r14
ret <8 x i16> <i16 65280, i16 0, i16 65535, i16 0,
i16 255, i16 65535, i16 256, i16 65280>
}

View File

@ -0,0 +1,43 @@
; Test vector byte masks, v4i32 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test an all-zeros vector.
define <4 x i32> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgbm %v24, 0
; CHECK: br %r14
ret <4 x i32> zeroinitializer
}
; Test an all-ones vector.
define <4 x i32> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgbm %v24, 65535
; CHECK: br %r14
ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
; Test a mixed vector (mask 0x8c76).
define <4 x i32> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgbm %v24, 35958
; CHECK: br %r14
ret <4 x i32> <i32 4278190080, i32 4294901760, i32 16777215, i32 16776960>
}
; Test that undefs are treated as zero (mask 0x8076).
define <4 x i32> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgbm %v24, 32886
; CHECK: br %r14
ret <4 x i32> <i32 4278190080, i32 undef, i32 16777215, i32 16776960>
}
; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
define <4 x i32> @f5() {
; CHECK-LABEL: f5:
; CHECK-NOT: vgbm
; CHECK: br %r14
ret <4 x i32> <i32 4278190080, i32 1, i32 16777215, i32 16776960>
}

View File

@ -0,0 +1,43 @@
; Test vector byte masks, v2i64 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test an all-zeros vector.
define <2 x i64> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgbm %v24, 0
; CHECK: br %r14
ret <2 x i64> zeroinitializer
}
; Test an all-ones vector.
define <2 x i64> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgbm %v24, 65535
; CHECK: br %r14
ret <2 x i64> <i64 -1, i64 -1>
}
; Test a mixed vector (mask 0x8c76).
define <2 x i64> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgbm %v24, 35958
; CHECK: br %r14
ret <2 x i64> <i64 18374686483966525440, i64 72057589759737600>
}
; Test that undefs are treated as zero (mask 0x8c00).
define <2 x i64> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgbm %v24, 35840
; CHECK: br %r14
ret <2 x i64> <i64 18374686483966525440, i64 undef>
}
; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
define <2 x i64> @f5() {
; CHECK-LABEL: f5:
; CHECK-NOT: vgbm
; CHECK: br %r14
ret <2 x i64> <i64 18374686483966525441, i64 72057589759737600>
}

View File

@ -0,0 +1,229 @@
; Test vector replicates, v16i8 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a byte-granularity replicate with the lowest useful value.
define <16 x i8> @f1() {
; CHECK-LABEL: f1:
; CHECK: vrepib %v24, 1
; CHECK: br %r14
ret <16 x i8> <i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1>
}
; Test a byte-granularity replicate with an arbitrary value.
define <16 x i8> @f2() {
; CHECK-LABEL: f2:
; CHECK: vrepib %v24, -55
; CHECK: br %r14
ret <16 x i8> <i8 201, i8 201, i8 201, i8 201,
i8 201, i8 201, i8 201, i8 201,
i8 201, i8 201, i8 201, i8 201,
i8 201, i8 201, i8 201, i8 201>
}
; Test a byte-granularity replicate with the highest useful value.
define <16 x i8> @f3() {
; CHECK-LABEL: f3:
; CHECK: vrepib %v24, -2
; CHECK: br %r14
ret <16 x i8> <i8 254, i8 254, i8 254, i8 254,
i8 254, i8 254, i8 254, i8 254,
i8 254, i8 254, i8 254, i8 254,
i8 254, i8 254, i8 254, i8 254>
}
; Test a halfword-granularity replicate with the lowest useful value.
define <16 x i8> @f4() {
; CHECK-LABEL: f4:
; CHECK: vrepih %v24, 1
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 1, i8 0, i8 1,
i8 0, i8 1, i8 0, i8 1,
i8 0, i8 1, i8 0, i8 1,
i8 0, i8 1, i8 0, i8 1>
}
; Test a halfword-granularity replicate with an arbitrary value.
define <16 x i8> @f5() {
; CHECK-LABEL: f5:
; CHECK: vrepih %v24, 25650
; CHECK: br %r14
ret <16 x i8> <i8 100, i8 50, i8 100, i8 50,
i8 100, i8 50, i8 100, i8 50,
i8 100, i8 50, i8 100, i8 50,
i8 100, i8 50, i8 100, i8 50>
}
; Test a halfword-granularity replicate with the highest useful value.
define <16 x i8> @f6() {
; CHECK-LABEL: f6:
; CHECK: vrepih %v24, -2
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 254, i8 255, i8 254,
i8 255, i8 254, i8 255, i8 254,
i8 255, i8 254, i8 255, i8 254,
i8 255, i8 254, i8 255, i8 254>
}
; Test a word-granularity replicate with the lowest useful positive value.
define <16 x i8> @f7() {
; CHECK-LABEL: f7:
; CHECK: vrepif %v24, 1
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 1,
i8 0, i8 0, i8 0, i8 1,
i8 0, i8 0, i8 0, i8 1,
i8 0, i8 0, i8 0, i8 1>
}
; Test a word-granularity replicate with the highest in-range value.
define <16 x i8> @f8() {
; CHECK-LABEL: f8:
; CHECK: vrepif %v24, 32767
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 127, i8 255,
i8 0, i8 0, i8 127, i8 255,
i8 0, i8 0, i8 127, i8 255,
i8 0, i8 0, i8 127, i8 255>
}
; Test a word-granularity replicate with the next highest value.
; This cannot use VREPIF.
define <16 x i8> @f9() {
; CHECK-LABEL: f9:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 0>
}
; Test a word-granularity replicate with the lowest in-range value.
define <16 x i8> @f10() {
; CHECK-LABEL: f10:
; CHECK: vrepif %v24, -32768
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 128, i8 0,
i8 255, i8 255, i8 128, i8 0,
i8 255, i8 255, i8 128, i8 0,
i8 255, i8 255, i8 128, i8 0>
}
; Test a word-granularity replicate with the next lowest value.
; This cannot use VREPIF.
define <16 x i8> @f11() {
; CHECK-LABEL: f11:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 127, i8 255>
}
; Test a word-granularity replicate with the highest useful negative value.
define <16 x i8> @f12() {
; CHECK-LABEL: f12:
; CHECK: vrepif %v24, -2
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 255, i8 254,
i8 255, i8 255, i8 255, i8 254,
i8 255, i8 255, i8 255, i8 254,
i8 255, i8 255, i8 255, i8 254>
}
; Test a doubleword-granularity replicate with the lowest useful positive
; value.
define <16 x i8> @f13() {
; CHECK-LABEL: f13:
; CHECK: vrepig %v24, 1
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 1,
i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 1>
}
; Test a doubleword-granularity replicate with the highest in-range value.
define <16 x i8> @f14() {
; CHECK-LABEL: f14:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 127, i8 255,
i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 127, i8 255>
}
; Test a doubleword-granularity replicate with the next highest value.
; This cannot use VREPIG.
define <16 x i8> @f15() {
; CHECK-LABEL: f15:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 128, i8 0>
}
; Test a doubleword-granularity replicate with the lowest in-range value.
define <16 x i8> @f16() {
; CHECK-LABEL: f16:
; CHECK: vrepig %v24, -32768
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 128, i8 0,
i8 255, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 128, i8 0>
}
; Test a doubleword-granularity replicate with the next lowest value.
; This cannot use VREPIG.
define <16 x i8> @f17() {
; CHECK-LABEL: f17:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 127, i8 255>
}
; Test a doubleword-granularity replicate with the highest useful negative
; value.
define <16 x i8> @f18() {
; CHECK-LABEL: f18:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 255, i8 254,
i8 255, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 255, i8 254>
}
; Repeat f14 with undefs optimistically treated as 0.
define <16 x i8> @f19() {
; CHECK-LABEL: f19:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 undef, i8 0, i8 0,
i8 0, i8 0, i8 127, i8 255,
i8 undef, i8 0, i8 undef, i8 0,
i8 0, i8 0, i8 127, i8 255>
}
; Repeat f18 with undefs optimistically treated as -1.
define <16 x i8> @f20() {
; CHECK-LABEL: f20:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <16 x i8> <i8 undef, i8 255, i8 255, i8 255,
i8 255, i8 255, i8 undef, i8 254,
i8 255, i8 255, i8 255, i8 undef,
i8 255, i8 undef, i8 255, i8 254>
}

View File

@ -0,0 +1,189 @@
; Test vector replicates, v8i16 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a byte-granularity replicate with the lowest useful value.
define <8 x i16> @f1() {
; CHECK-LABEL: f1:
; CHECK: vrepib %v24, 1
; CHECK: br %r14
ret <8 x i16> <i16 257, i16 257, i16 257, i16 257,
i16 257, i16 257, i16 257, i16 257>
}
; Test a byte-granularity replicate with an arbitrary value.
define <8 x i16> @f2() {
; CHECK-LABEL: f2:
; CHECK: vrepib %v24, -55
; CHECK: br %r14
ret <8 x i16> <i16 51657, i16 51657, i16 51657, i16 51657,
i16 51657, i16 51657, i16 51657, i16 51657>
}
; Test a byte-granularity replicate with the highest useful value.
define <8 x i16> @f3() {
; CHECK-LABEL: f3:
; CHECK: vrepib %v24, -2
; CHECK: br %r14
ret <8 x i16> <i16 -258, i16 -258, i16 -258, i16 -258,
i16 -258, i16 -258, i16 -258, i16 -258>
}
; Test a halfword-granularity replicate with the lowest useful value.
define <8 x i16> @f4() {
; CHECK-LABEL: f4:
; CHECK: vrepih %v24, 1
; CHECK: br %r14
ret <8 x i16> <i16 1, i16 1, i16 1, i16 1,
i16 1, i16 1, i16 1, i16 1>
}
; Test a halfword-granularity replicate with an arbitrary value.
define <8 x i16> @f5() {
; CHECK-LABEL: f5:
; CHECK: vrepih %v24, 25650
; CHECK: br %r14
ret <8 x i16> <i16 25650, i16 25650, i16 25650, i16 25650,
i16 25650, i16 25650, i16 25650, i16 25650>
}
; Test a halfword-granularity replicate with the highest useful value.
define <8 x i16> @f6() {
; CHECK-LABEL: f6:
; CHECK: vrepih %v24, -2
; CHECK: br %r14
ret <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534,
i16 65534, i16 65534, i16 65534, i16 65534>
}
; Test a word-granularity replicate with the lowest useful positive value.
define <8 x i16> @f7() {
; CHECK-LABEL: f7:
; CHECK: vrepif %v24, 1
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 1, i16 0, i16 1,
i16 0, i16 1, i16 0, i16 1>
}
; Test a word-granularity replicate with the highest in-range value.
define <8 x i16> @f8() {
; CHECK-LABEL: f8:
; CHECK: vrepif %v24, 32767
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 32767, i16 0, i16 32767,
i16 0, i16 32767, i16 0, i16 32767>
}
; Test a word-granularity replicate with the next highest value.
; This cannot use VREPIF.
define <8 x i16> @f9() {
; CHECK-LABEL: f9:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 32768, i16 0, i16 32768,
i16 0, i16 32768, i16 0, i16 32768>
}
; Test a word-granularity replicate with the lowest in-range value.
define <8 x i16> @f10() {
; CHECK-LABEL: f10:
; CHECK: vrepif %v24, -32768
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -32768, i16 -1, i16 -32768,
i16 -1, i16 -32768, i16 -1, i16 -32768>
}
; Test a word-granularity replicate with the next lowest value.
; This cannot use VREPIF.
define <8 x i16> @f11() {
; CHECK-LABEL: f11:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -32769, i16 -1, i16 -32769,
i16 -1, i16 -32769, i16 -1, i16 -32769>
}
; Test a word-granularity replicate with the highest useful negative value.
define <8 x i16> @f12() {
; CHECK-LABEL: f12:
; CHECK: vrepif %v24, -2
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -2, i16 -1, i16 -2,
i16 -1, i16 -2, i16 -1, i16 -2>
}
; Test a doubleword-granularity replicate with the lowest useful positive
; value.
define <8 x i16> @f13() {
; CHECK-LABEL: f13:
; CHECK: vrepig %v24, 1
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 0, i16 0, i16 1,
i16 0, i16 0, i16 0, i16 1>
}
; Test a doubleword-granularity replicate with the highest in-range value.
define <8 x i16> @f14() {
; CHECK-LABEL: f14:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 0, i16 0, i16 32767,
i16 0, i16 0, i16 0, i16 32767>
}
; Test a doubleword-granularity replicate with the next highest value.
; This cannot use VREPIG.
define <8 x i16> @f15() {
; CHECK-LABEL: f15:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 0, i16 0, i16 32768,
i16 0, i16 0, i16 0, i16 32768>
}
; Test a doubleword-granularity replicate with the lowest in-range value.
define <8 x i16> @f16() {
; CHECK-LABEL: f16:
; CHECK: vrepig %v24, -32768
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -32768,
i16 -1, i16 -1, i16 -1, i16 -32768>
}
; Test a doubleword-granularity replicate with the next lowest value.
; This cannot use VREPIG.
define <8 x i16> @f17() {
; CHECK-LABEL: f17:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -32769,
i16 -1, i16 -1, i16 -1, i16 -32769>
}
; Test a doubleword-granularity replicate with the highest useful negative
; value.
define <8 x i16> @f18() {
; CHECK-LABEL: f18:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -2,
i16 -1, i16 -1, i16 -1, i16 -2>
}
; Repeat f14 with undefs optimistically treated as 0.
define <8 x i16> @f19() {
; CHECK-LABEL: f19:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 undef, i16 0, i16 32767,
i16 undef, i16 0, i16 undef, i16 32767>
}
; Repeat f18 with undefs optimistically treated as -1.
define <8 x i16> @f20() {
; CHECK-LABEL: f20:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -1, i16 undef, i16 -2,
i16 undef, i16 undef, i16 -1, i16 -2>
}

View File

@ -0,0 +1,169 @@
; Test vector replicates, v4i32 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a byte-granularity replicate with the lowest useful value.
define <4 x i32> @f1() {
; CHECK-LABEL: f1:
; CHECK: vrepib %v24, 1
; CHECK: br %r14
ret <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
}
; Test a byte-granularity replicate with an arbitrary value.
define <4 x i32> @f2() {
; CHECK-LABEL: f2:
; CHECK: vrepib %v24, -55
; CHECK: br %r14
ret <4 x i32> <i32 3385444809, i32 3385444809, i32 3385444809, i32 3385444809>
}
; Test a byte-granularity replicate with the highest useful value.
define <4 x i32> @f3() {
; CHECK-LABEL: f3:
; CHECK: vrepib %v24, -2
; CHECK: br %r14
ret <4 x i32> <i32 4278124286, i32 4278124286, i32 4278124286, i32 4278124286>
}
; Test a halfword-granularity replicate with the lowest useful value.
define <4 x i32> @f4() {
; CHECK-LABEL: f4:
; CHECK: vrepih %v24, 1
; CHECK: br %r14
ret <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>
}
; Test a halfword-granularity replicate with an arbitrary value.
define <4 x i32> @f5() {
; CHECK-LABEL: f5:
; CHECK: vrepih %v24, 25650
; CHECK: br %r14
ret <4 x i32> <i32 1681024050, i32 1681024050, i32 1681024050, i32 1681024050>
}
; Test a halfword-granularity replicate with the highest useful value.
define <4 x i32> @f6() {
; CHECK-LABEL: f6:
; CHECK: vrepih %v24, -2
; CHECK: br %r14
ret <4 x i32> <i32 -65538, i32 -65538, i32 -65538, i32 -65538>
}
; Test a word-granularity replicate with the lowest useful positive value.
define <4 x i32> @f7() {
; CHECK-LABEL: f7:
; CHECK: vrepif %v24, 1
; CHECK: br %r14
ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
}
; Test a word-granularity replicate with the highest in-range value.
define <4 x i32> @f8() {
; CHECK-LABEL: f8:
; CHECK: vrepif %v24, 32767
; CHECK: br %r14
ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
}
; Test a word-granularity replicate with the next highest value.
; This cannot use VREPIF.
define <4 x i32> @f9() {
; CHECK-LABEL: f9:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>
}
; Test a word-granularity replicate with the lowest in-range value.
define <4 x i32> @f10() {
; CHECK-LABEL: f10:
; CHECK: vrepif %v24, -32768
; CHECK: br %r14
ret <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
}
; Test a word-granularity replicate with the next lowest value.
; This cannot use VREPIF.
define <4 x i32> @f11() {
; CHECK-LABEL: f11:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <4 x i32> <i32 -32769, i32 -32769, i32 -32769, i32 -32769>
}
; Test a word-granularity replicate with the highest useful negative value.
define <4 x i32> @f12() {
; CHECK-LABEL: f12:
; CHECK: vrepif %v24, -2
; CHECK: br %r14
ret <4 x i32> <i32 -2, i32 -2, i32 -2, i32 -2>
}
; Test a doubleword-granularity replicate with the lowest useful positive
; value.
define <4 x i32> @f13() {
; CHECK-LABEL: f13:
; CHECK: vrepig %v24, 1
; CHECK: br %r14
ret <4 x i32> <i32 0, i32 1, i32 0, i32 1>
}
; Test a doubleword-granularity replicate with the highest in-range value.
define <4 x i32> @f14() {
; CHECK-LABEL: f14:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <4 x i32> <i32 0, i32 32767, i32 0, i32 32767>
}
; Test a doubleword-granularity replicate with the next highest value.
; This cannot use VREPIG.
define <4 x i32> @f15() {
; CHECK-LABEL: f15:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <4 x i32> <i32 0, i32 32768, i32 0, i32 32768>
}
; Test a doubleword-granularity replicate with the lowest in-range value.
define <4 x i32> @f16() {
; CHECK-LABEL: f16:
; CHECK: vrepig %v24, -32768
; CHECK: br %r14
ret <4 x i32> <i32 -1, i32 -32768, i32 -1, i32 -32768>
}
; Test a doubleword-granularity replicate with the next lowest value.
; This cannot use VREPIG.
define <4 x i32> @f17() {
; CHECK-LABEL: f17:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <4 x i32> <i32 -1, i32 -32769, i32 -1, i32 -32769>
}
; Test a doubleword-granularity replicate with the highest useful negative
; value.
define <4 x i32> @f18() {
; CHECK-LABEL: f18:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <4 x i32> <i32 -1, i32 -2, i32 -1, i32 -2>
}
; Repeat f14 with undefs optimistically treated as 0, 32767.
define <4 x i32> @f19() {
; CHECK-LABEL: f19:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <4 x i32> <i32 undef, i32 undef, i32 0, i32 32767>
}
; Repeat f18 with undefs optimistically treated as -2, -1.
define <4 x i32> @f20() {
; CHECK-LABEL: f20:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <4 x i32> <i32 -1, i32 undef, i32 undef, i32 -2>
}

View File

@ -0,0 +1,169 @@
; Test vector replicates, v2i64 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a byte-granularity replicate with the lowest useful value.
define <2 x i64> @f1() {
; CHECK-LABEL: f1:
; CHECK: vrepib %v24, 1
; CHECK: br %r14
ret <2 x i64> <i64 72340172838076673, i64 72340172838076673>
}
; Test a byte-granularity replicate with an arbitrary value.
define <2 x i64> @f2() {
; CHECK-LABEL: f2:
; CHECK: vrepib %v24, -55
; CHECK: br %r14
ret <2 x i64> <i64 -3906369333256140343, i64 -3906369333256140343>
}
; Test a byte-granularity replicate with the highest useful value.
define <2 x i64> @f3() {
; CHECK-LABEL: f3:
; CHECK: vrepib %v24, -2
; CHECK: br %r14
ret <2 x i64> <i64 -72340172838076674, i64 -72340172838076674>
}
; Test a halfword-granularity replicate with the lowest useful value.
define <2 x i64> @f4() {
; CHECK-LABEL: f4:
; CHECK: vrepih %v24, 1
; CHECK: br %r14
ret <2 x i64> <i64 281479271743489, i64 281479271743489>
}
; Test a halfword-granularity replicate with an arbitrary value.
define <2 x i64> @f5() {
; CHECK-LABEL: f5:
; CHECK: vrepih %v24, 25650
; CHECK: br %r14
ret <2 x i64> <i64 7219943320220492850, i64 7219943320220492850>
}
; Test a halfword-granularity replicate with the highest useful value.
define <2 x i64> @f6() {
; CHECK-LABEL: f6:
; CHECK: vrepih %v24, -2
; CHECK: br %r14
ret <2 x i64> <i64 -281479271743490, i64 -281479271743490>
}
; Test a word-granularity replicate with the lowest useful positive value.
define <2 x i64> @f7() {
; CHECK-LABEL: f7:
; CHECK: vrepif %v24, 1
; CHECK: br %r14
ret <2 x i64> <i64 4294967297, i64 4294967297>
}
; Test a word-granularity replicate with the highest in-range value.
define <2 x i64> @f8() {
; CHECK-LABEL: f8:
; CHECK: vrepif %v24, 32767
; CHECK: br %r14
ret <2 x i64> <i64 140733193420799, i64 140733193420799>
}
; Test a word-granularity replicate with the next highest value.
; This cannot use VREPIF.
define <2 x i64> @f9() {
; CHECK-LABEL: f9:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <2 x i64> <i64 140737488388096, i64 140737488388096>
}
; Test a word-granularity replicate with the lowest in-range value.
define <2 x i64> @f10() {
; CHECK-LABEL: f10:
; CHECK: vrepif %v24, -32768
; CHECK: br %r14
ret <2 x i64> <i64 -140733193420800, i64 -140733193420800>
}
; Test a word-granularity replicate with the next lowest value.
; This cannot use VREPIF.
define <2 x i64> @f11() {
; CHECK-LABEL: f11:
; CHECK-NOT: vrepif
; CHECK: br %r14
ret <2 x i64> <i64 -140737488388097, i64 -140737488388097>
}
; Test a word-granularity replicate with the highest useful negative value.
define <2 x i64> @f12() {
; CHECK-LABEL: f12:
; CHECK: vrepif %v24, -2
; CHECK: br %r14
ret <2 x i64> <i64 -4294967298, i64 -4294967298>
}
; Test a doubleword-granularity replicate with the lowest useful positive
; value.
define <2 x i64> @f13() {
; CHECK-LABEL: f13:
; CHECK: vrepig %v24, 1
; CHECK: br %r14
ret <2 x i64> <i64 1, i64 1>
}
; Test a doubleword-granularity replicate with the highest in-range value.
define <2 x i64> @f14() {
; CHECK-LABEL: f14:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <2 x i64> <i64 32767, i64 32767>
}
; Test a doubleword-granularity replicate with the next highest value.
; This cannot use VREPIG.
define <2 x i64> @f15() {
; CHECK-LABEL: f15:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <2 x i64> <i64 32768, i64 32768>
}
; Test a doubleword-granularity replicate with the lowest in-range value.
define <2 x i64> @f16() {
; CHECK-LABEL: f16:
; CHECK: vrepig %v24, -32768
; CHECK: br %r14
ret <2 x i64> <i64 -32768, i64 -32768>
}
; Test a doubleword-granularity replicate with the next lowest value.
; This cannot use VREPIG.
define <2 x i64> @f17() {
; CHECK-LABEL: f17:
; CHECK-NOT: vrepig
; CHECK: br %r14
ret <2 x i64> <i64 -32769, i64 -32769>
}
; Test a doubleword-granularity replicate with the highest useful negative
; value.
define <2 x i64> @f18() {
; CHECK-LABEL: f18:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <2 x i64> <i64 -2, i64 -2>
}
; Repeat f14 with undefs optimistically treated as 32767.
define <2 x i64> @f19() {
; CHECK-LABEL: f19:
; CHECK: vrepig %v24, 32767
; CHECK: br %r14
ret <2 x i64> <i64 undef, i64 32767>
}
; Repeat f18 with undefs optimistically treated as -2.
define <2 x i64> @f20() {
; CHECK-LABEL: f20:
; CHECK: vrepig %v24, -2
; CHECK: br %r14
ret <2 x i64> <i64 undef, i64 -2>
}

View File

@ -0,0 +1,193 @@
; Test vector replicates that use VECTOR GENERATE MASK, v16i8 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a word-granularity replicate with the lowest value that cannot use
; VREPIF.
define <16 x i8> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgmf %v24, 16, 16
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 0>
}
; Test a word-granularity replicate that has the lower 17 bits set.
define <16 x i8> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgmf %v24, 15, 31
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 1, i8 255, i8 255,
i8 0, i8 1, i8 255, i8 255,
i8 0, i8 1, i8 255, i8 255,
i8 0, i8 1, i8 255, i8 255>
}
; Test a word-granularity replicate that has the upper 15 bits set.
define <16 x i8> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgmf %v24, 0, 14
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 254, i8 0, i8 0,
i8 255, i8 254, i8 0, i8 0,
i8 255, i8 254, i8 0, i8 0,
i8 255, i8 254, i8 0, i8 0>
}
; Test a word-granularity replicate that has middle bits set.
define <16 x i8> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgmf %v24, 12, 17
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 15, i8 192, i8 0,
i8 0, i8 15, i8 192, i8 0,
i8 0, i8 15, i8 192, i8 0,
i8 0, i8 15, i8 192, i8 0>
}
; Test a word-granularity replicate with a wrap-around mask.
define <16 x i8> @f5() {
; CHECK-LABEL: f5:
; CHECK: vgmf %v24, 17, 15
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 127, i8 255,
i8 255, i8 255, i8 127, i8 255>
}
; Test a doubleword-granularity replicate with the lowest value that cannot
; use VREPIG.
define <16 x i8> @f6() {
; CHECK-LABEL: f6:
; CHECK: vgmg %v24, 48, 48
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 128, i8 0,
i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 128, i8 0>
}
; Test a doubleword-granularity replicate that has the lower 22 bits set.
define <16 x i8> @f7() {
; CHECK-LABEL: f7:
; CHECK: vgmg %v24, 42, 63
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
i8 0, i8 63, i8 255, i8 255,
i8 0, i8 0, i8 0, i8 0,
i8 0, i8 63, i8 255, i8 255>
}
; Test a doubleword-granularity replicate that has the upper 45 bits set.
define <16 x i8> @f8() {
; CHECK-LABEL: f8:
; CHECK: vgmg %v24, 0, 44
; CHECK: br %r14
ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
i8 255, i8 248, i8 0, i8 0,
i8 255, i8 255, i8 255, i8 255,
i8 255, i8 248, i8 0, i8 0>
}
; Test a doubleword-granularity replicate that has middle bits set.
define <16 x i8> @f9() {
; CHECK-LABEL: f9:
; CHECK: vgmg %v24, 31, 42
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 0, i8 0, i8 1,
i8 255, i8 224, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 1,
i8 255, i8 224, i8 0, i8 0>
}
; Test a doubleword-granularity replicate with a wrap-around mask.
define <16 x i8> @f10() {
; CHECK-LABEL: f10:
; CHECK: vgmg %v24, 18, 0
; CHECK: br %r14
ret <16 x i8> <i8 128, i8 0, i8 63, i8 255,
i8 255, i8 255, i8 255, i8 255,
i8 128, i8 0, i8 63, i8 255,
i8 255, i8 255, i8 255, i8 255>
}
; Retest f1 with arbitrary undefs instead of 0s.
define <16 x i8> @f11() {
; CHECK-LABEL: f11:
; CHECK: vgmf %v24, 16, 16
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 undef, i8 128, i8 0,
i8 0, i8 0, i8 128, i8 undef,
i8 undef, i8 0, i8 128, i8 0,
i8 undef, i8 undef, i8 128, i8 0>
}
; Try a case where we want consistent undefs to be treated as 0.
define <16 x i8> @f12() {
; CHECK-LABEL: f12:
; CHECK: vgmf %v24, 15, 23
; CHECK: br %r14
ret <16 x i8> <i8 undef, i8 1, i8 255, i8 0,
i8 undef, i8 1, i8 255, i8 0,
i8 undef, i8 1, i8 255, i8 0,
i8 undef, i8 1, i8 255, i8 0>
}
; ...and again with the lower bits of the replicated constant.
define <16 x i8> @f13() {
; CHECK-LABEL: f13:
; CHECK: vgmf %v24, 15, 22
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 1, i8 254, i8 undef,
i8 0, i8 1, i8 254, i8 undef,
i8 0, i8 1, i8 254, i8 undef,
i8 0, i8 1, i8 254, i8 undef>
}
; Try a case where we want consistent undefs to be treated as -1.
define <16 x i8> @f14() {
; CHECK-LABEL: f14:
; CHECK: vgmf %v24, 28, 8
; CHECK: br %r14
ret <16 x i8> <i8 undef, i8 128, i8 0, i8 15,
i8 undef, i8 128, i8 0, i8 15,
i8 undef, i8 128, i8 0, i8 15,
i8 undef, i8 128, i8 0, i8 15>
}
; ...and again with the lower bits of the replicated constant.
define <16 x i8> @f15() {
; CHECK-LABEL: f15:
; CHECK: vgmf %v24, 18, 3
; CHECK: br %r14
ret <16 x i8> <i8 240, i8 0, i8 63, i8 undef,
i8 240, i8 0, i8 63, i8 undef,
i8 240, i8 0, i8 63, i8 undef,
i8 240, i8 0, i8 63, i8 undef>
}
; Repeat f9 with arbitrary undefs.
define <16 x i8> @f16() {
; CHECK-LABEL: f16:
; CHECK: vgmg %v24, 31, 42
; CHECK: br %r14
ret <16 x i8> <i8 undef, i8 0, i8 undef, i8 1,
i8 255, i8 undef, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 1,
i8 undef, i8 224, i8 undef, i8 undef>
}
; Try a case where we want some consistent undefs to be treated as 0
; and some to be treated as 255.
define <16 x i8> @f17() {
; CHECK-LABEL: f17:
; CHECK: vgmg %v24, 23, 35
; CHECK: br %r14
ret <16 x i8> <i8 0, i8 undef, i8 1, i8 undef,
i8 240, i8 undef, i8 0, i8 0,
i8 0, i8 undef, i8 1, i8 undef,
i8 240, i8 undef, i8 0, i8 0>
}

View File

@ -0,0 +1,113 @@
; Test vector replicates that use VECTOR GENERATE MASK, v8i16 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a word-granularity replicate with the lowest value that cannot use
; VREPIF.
define <8 x i16> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgmf %v24, 16, 16
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 32768, i16 0, i16 32768,
i16 0, i16 32768, i16 0, i16 32768>
}
; Test a word-granularity replicate that has the lower 17 bits set.
define <8 x i16> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgmf %v24, 15, 31
; CHECK: br %r14
ret <8 x i16> <i16 1, i16 -1, i16 1, i16 -1,
i16 1, i16 -1, i16 1, i16 -1>
}
; Test a word-granularity replicate that has the upper 15 bits set.
define <8 x i16> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgmf %v24, 0, 14
; CHECK: br %r14
ret <8 x i16> <i16 -2, i16 0, i16 -2, i16 0,
i16 -2, i16 0, i16 -2, i16 0>
}
; Test a word-granularity replicate that has middle bits set.
define <8 x i16> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgmf %v24, 12, 17
; CHECK: br %r14
ret <8 x i16> <i16 15, i16 49152, i16 15, i16 49152,
i16 15, i16 49152, i16 15, i16 49152>
}
; Test a word-granularity replicate with a wrap-around mask.
define <8 x i16> @f5() {
; CHECK-LABEL: f5:
; CHECK: vgmf %v24, 17, 15
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 32767, i16 -1, i16 32767,
i16 -1, i16 32767, i16 -1, i16 32767>
}
; Test a doubleword-granularity replicate with the lowest value that cannot
; use VREPIG.
define <8 x i16> @f6() {
; CHECK-LABEL: f6:
; CHECK: vgmg %v24, 48, 48
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 0, i16 0, i16 32768,
i16 0, i16 0, i16 0, i16 32768>
}
; Test a doubleword-granularity replicate that has the lower 22 bits set.
define <8 x i16> @f7() {
; CHECK-LABEL: f7:
; CHECK: vgmg %v24, 42, 63
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 0, i16 63, i16 -1,
i16 0, i16 0, i16 63, i16 -1>
}
; Test a doubleword-granularity replicate that has the upper 45 bits set.
define <8 x i16> @f8() {
; CHECK-LABEL: f8:
; CHECK: vgmg %v24, 0, 44
; CHECK: br %r14
ret <8 x i16> <i16 -1, i16 -1, i16 -8, i16 0,
i16 -1, i16 -1, i16 -8, i16 0>
}
; Test a doubleword-granularity replicate that has middle bits set.
define <8 x i16> @f9() {
; CHECK-LABEL: f9:
; CHECK: vgmg %v24, 31, 42
; CHECK: br %r14
ret <8 x i16> <i16 0, i16 1, i16 -32, i16 0,
i16 0, i16 1, i16 -32, i16 0>
}
; Test a doubleword-granularity replicate with a wrap-around mask.
define <8 x i16> @f10() {
; CHECK-LABEL: f10:
; CHECK: vgmg %v24, 18, 0
; CHECK: br %r14
ret <8 x i16> <i16 32768, i16 16383, i16 -1, i16 -1,
i16 32768, i16 16383, i16 -1, i16 -1>
}
; Retest f1 with arbitrary undefs instead of 0s.
define <8 x i16> @f11() {
; CHECK-LABEL: f11:
; CHECK: vgmf %v24, 16, 16
; CHECK: br %r14
ret <8 x i16> <i16 undef, i16 32768, i16 0, i16 32768,
i16 0, i16 32768, i16 undef, i16 32768>
}
; ...likewise f9.
define <8 x i16> @f12() {
; CHECK-LABEL: f12:
; CHECK: vgmg %v24, 31, 42
; CHECK: br %r14
ret <8 x i16> <i16 undef, i16 1, i16 -32, i16 0,
i16 0, i16 1, i16 -32, i16 undef>
}

View File

@ -0,0 +1,85 @@
; Test vector replicates that use VECTOR GENERATE MASK, v4i32 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a word-granularity replicate with the lowest value that cannot use
; VREPIF.
define <4 x i32> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgmf %v24, 16, 16
; CHECK: br %r14
ret <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>
}
; Test a word-granularity replicate that has the lower 17 bits set.
define <4 x i32> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgmf %v24, 15, 31
; CHECK: br %r14
ret <4 x i32> <i32 131071, i32 131071, i32 131071, i32 131071>
}
; Test a word-granularity replicate that has the upper 15 bits set.
define <4 x i32> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgmf %v24, 0, 14
; CHECK: br %r14
ret <4 x i32> <i32 -131072, i32 -131072, i32 -131072, i32 -131072>
}
; Test a word-granularity replicate that has middle bits set.
define <4 x i32> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgmf %v24, 12, 17
; CHECK: br %r14
ret <4 x i32> <i32 1032192, i32 1032192, i32 1032192, i32 1032192>
}
; Test a word-granularity replicate with a wrap-around mask.
define <4 x i32> @f5() {
; CHECK-LABEL: f5:
; CHECK: vgmf %v24, 17, 15
; CHECK: br %r14
ret <4 x i32> <i32 -32769, i32 -32769, i32 -32769, i32 -32769>
}
; Test a doubleword-granularity replicate with the lowest value that cannot
; use VREPIG.
define <4 x i32> @f6() {
; CHECK-LABEL: f6:
; CHECK: vgmg %v24, 48, 48
; CHECK: br %r14
ret <4 x i32> <i32 0, i32 32768, i32 0, i32 32768>
}
; Test a doubleword-granularity replicate that has the lower 22 bits set.
define <4 x i32> @f7() {
; CHECK-LABEL: f7:
; CHECK: vgmg %v24, 42, 63
; CHECK: br %r14
ret <4 x i32> <i32 0, i32 4194303, i32 0, i32 4194303>
}
; Test a doubleword-granularity replicate that has the upper 45 bits set.
define <4 x i32> @f8() {
; CHECK-LABEL: f8:
; CHECK: vgmg %v24, 0, 44
; CHECK: br %r14
ret <4 x i32> <i32 -1, i32 -524288, i32 -1, i32 -524288>
}
; Test a doubleword-granularity replicate that has middle bits set.
define <4 x i32> @f9() {
; CHECK-LABEL: f9:
; CHECK: vgmg %v24, 31, 42
; CHECK: br %r14
ret <4 x i32> <i32 1, i32 -2097152, i32 1, i32 -2097152>
}
; Test a doubleword-granularity replicate with a wrap-around mask.
define <4 x i32> @f10() {
; CHECK-LABEL: f10:
; CHECK: vgmg %v24, 18, 0
; CHECK: br %r14
ret <4 x i32> <i32 -2147467265, i32 -1, i32 -2147467265, i32 -1>
}

View File

@ -0,0 +1,85 @@
; Test vector replicates that use VECTOR GENERATE MASK, v2i64 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a word-granularity replicate with the lowest value that cannot use
; VREPIF.
define <2 x i64> @f1() {
; CHECK-LABEL: f1:
; CHECK: vgmf %v24, 16, 16
; CHECK: br %r14
ret <2 x i64> <i64 140737488388096, i64 140737488388096>
}
; Test a word-granularity replicate that has the lower 17 bits set.
define <2 x i64> @f2() {
; CHECK-LABEL: f2:
; CHECK: vgmf %v24, 15, 31
; CHECK: br %r14
ret <2 x i64> <i64 562945658585087, i64 562945658585087>
}
; Test a word-granularity replicate that has the upper 15 bits set.
define <2 x i64> @f3() {
; CHECK-LABEL: f3:
; CHECK: vgmf %v24, 0, 14
; CHECK: br %r14
ret <2 x i64> <i64 -562945658585088, i64 -562945658585088>
}
; Test a word-granularity replicate that has middle bits set.
define <2 x i64> @f4() {
; CHECK-LABEL: f4:
; CHECK: vgmf %v24, 12, 17
; CHECK: br %r14
ret <2 x i64> <i64 4433230884225024, i64 4433230884225024>
}
; Test a word-granularity replicate with a wrap-around mask.
define <2 x i64> @f5() {
; CHECK-LABEL: f5:
; CHECK: vgmf %v24, 17, 15
; CHECK: br %r14
ret <2 x i64> <i64 -140737488388097, i64 -140737488388097>
}
; Test a doubleword-granularity replicate with the lowest value that cannot
; use VREPIG.
define <2 x i64> @f6() {
; CHECK-LABEL: f6:
; CHECK: vgmg %v24, 48, 48
; CHECK: br %r14
ret <2 x i64> <i64 32768, i64 32768>
}
; Test a doubleword-granularity replicate that has the lower 22 bits set.
define <2 x i64> @f7() {
; CHECK-LABEL: f7:
; CHECK: vgmg %v24, 42, 63
; CHECK: br %r14
ret <2 x i64> <i64 4194303, i64 4194303>
}
; Test a doubleword-granularity replicate that has the upper 45 bits set.
define <2 x i64> @f8() {
; CHECK-LABEL: f8:
; CHECK: vgmg %v24, 0, 44
; CHECK: br %r14
ret <2 x i64> <i64 -524288, i64 -524288>
}
; Test a doubleword-granularity replicate that has middle bits set.
define <2 x i64> @f9() {
; CHECK-LABEL: f9:
; CHECK: vgmg %v24, 31, 42
; CHECK: br %r14
ret <2 x i64> <i64 8587837440, i64 8587837440>
}
; Test a doubleword-granularity replicate with a wrap-around mask.
define <2 x i64> @f10() {
; CHECK-LABEL: f10:
; CHECK: vgmg %v24, 18, 0
; CHECK: br %r14
ret <2 x i64> <i64 -9223301668110598145, i64 -9223301668110598145>
}

View File

@ -0,0 +1,81 @@
; Test vector count leading zeros
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 %is_zero_undef)
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 %is_zero_undef)
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 %is_zero_undef)
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 %is_zero_undef)
define <16 x i8> @f1(<16 x i8> %a) {
; CHECK-LABEL: f1:
; CHECK: vclzb %v24, %v24
; CHECK: br %r14
%res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
ret <16 x i8> %res
}
define <16 x i8> @f2(<16 x i8> %a) {
; CHECK-LABEL: f2:
; CHECK: vclzb %v24, %v24
; CHECK: br %r14
%res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
ret <16 x i8> %res
}
define <8 x i16> @f3(<8 x i16> %a) {
; CHECK-LABEL: f3:
; CHECK: vclzh %v24, %v24
; CHECK: br %r14
%res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
ret <8 x i16> %res
}
define <8 x i16> @f4(<8 x i16> %a) {
; CHECK-LABEL: f4:
; CHECK: vclzh %v24, %v24
; CHECK: br %r14
%res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
ret <8 x i16> %res
}
define <4 x i32> @f5(<4 x i32> %a) {
; CHECK-LABEL: f5:
; CHECK: vclzf %v24, %v24
; CHECK: br %r14
%res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
ret <4 x i32> %res
}
define <4 x i32> @f6(<4 x i32> %a) {
; CHECK-LABEL: f6:
; CHECK: vclzf %v24, %v24
; CHECK: br %r14
%res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
ret <4 x i32> %res
}
define <2 x i64> @f7(<2 x i64> %a) {
; CHECK-LABEL: f7:
; CHECK: vclzg %v24, %v24
; CHECK: br %r14
%res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
ret <2 x i64> %res
}
define <2 x i64> @f8(<2 x i64> %a) {
; CHECK-LABEL: f8:
; CHECK: vclzg %v24, %v24
; CHECK: br %r14
%res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %res
}

View File

@ -0,0 +1,53 @@
; Test vector population-count instruction
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
define <16 x i8> @f1(<16 x i8> %a) {
; CHECK-LABEL: f1:
; CHECK: vpopct %v24, %v24, 0
; CHECK: br %r14
%popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
ret <16 x i8> %popcnt
}
define <8 x i16> @f2(<8 x i16> %a) {
; CHECK-LABEL: f2:
; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0
; CHECK: veslh [[T2:%v[0-9]+]], [[T1]], 8
; CHECK: vah [[T3:%v[0-9]+]], [[T1]], [[T2]]
; CHECK: vesrlh %v24, [[T3]], 8
; CHECK: br %r14
%popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
ret <8 x i16> %popcnt
}
define <4 x i32> @f3(<4 x i32> %a) {
; CHECK-LABEL: f3:
; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0
; CHECK: vgbm [[T2:%v[0-9]+]], 0
; CHECK: vsumb %v24, [[T1]], [[T2]]
; CHECK: br %r14
%popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
ret <4 x i32> %popcnt
}
define <2 x i64> @f4(<2 x i64> %a) {
; CHECK-LABEL: f4:
; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0
; CHECK: vgbm [[T2:%v[0-9]+]], 0
; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]]
; CHECK: vsumgf %v24, [[T3]], [[T2]]
; CHECK: br %r14
%popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %popcnt
}

View File

@ -0,0 +1,81 @@
; Test vector count trailing zeros
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 %is_zero_undef)
declare <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 %is_zero_undef)
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 %is_zero_undef)
declare <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 %is_zero_undef)
define <16 x i8> @f1(<16 x i8> %a) {
; CHECK-LABEL: f1:
; CHECK: vctzb %v24, %v24
; CHECK: br %r14
%res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
ret <16 x i8> %res
}
define <16 x i8> @f2(<16 x i8> %a) {
; CHECK-LABEL: f2:
; CHECK: vctzb %v24, %v24
; CHECK: br %r14
%res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
ret <16 x i8> %res
}
define <8 x i16> @f3(<8 x i16> %a) {
; CHECK-LABEL: f3:
; CHECK: vctzh %v24, %v24
; CHECK: br %r14
%res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
ret <8 x i16> %res
}
define <8 x i16> @f4(<8 x i16> %a) {
; CHECK-LABEL: f4:
; CHECK: vctzh %v24, %v24
; CHECK: br %r14
%res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
ret <8 x i16> %res
}
define <4 x i32> @f5(<4 x i32> %a) {
; CHECK-LABEL: f5:
; CHECK: vctzf %v24, %v24
; CHECK: br %r14
%res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
ret <4 x i32> %res
}
define <4 x i32> @f6(<4 x i32> %a) {
; CHECK-LABEL: f6:
; CHECK: vctzf %v24, %v24
; CHECK: br %r14
%res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
ret <4 x i32> %res
}
define <2 x i64> @f7(<2 x i64> %a) {
; CHECK-LABEL: f7:
; CHECK: vctzg %v24, %v24
; CHECK: br %r14
%res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
ret <2 x i64> %res
}
define <2 x i64> @f8(<2 x i64> %a) {
; CHECK-LABEL: f8:
; CHECK: vctzg %v24, %v24
; CHECK: br %r14
%res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %res
}

View File

@ -0,0 +1,62 @@
; Test vector division. There is no native support for this, so it's really
; a test of the operation legalization code.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 division.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vlvgp [[REG:%v[0-9]+]],
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 0
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 1
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 2
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 3
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 4
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 5
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 6
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 8
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 9
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 10
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 11
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 12
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 13
; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 14
; CHECK: br %r14
%ret = sdiv <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 division.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vlvgp [[REG:%v[0-9]+]],
; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 0
; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 1
; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 2
; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 4
; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 5
; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 6
; CHECK: br %r14
%ret = sdiv <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 division.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vlvgp [[REG:%v[0-9]+]],
; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 0
; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 2
; CHECK: br %r14
%ret = sdiv <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 division.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vlvgp %v24,
; CHECK: br %r14
%ret = sdiv <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,83 @@
; Test v16i8 maximum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with sle.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with sgt.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}
; Test with sge.
define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}
; Test with ult.
define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with ule.
define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with ugt.
define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}
; Test with uge.
define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <16 x i8> %val1, %val2
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}

View File

@ -0,0 +1,83 @@
; Test v8i16 maximum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with sle.
define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with sgt.
define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}
; Test with sge.
define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}
; Test with ult.
define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with ule.
define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with ugt.
define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}
; Test with uge.
define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <8 x i16> %val1, %val2
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}

View File

@ -0,0 +1,83 @@
; Test v4i32 maximum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with sle.
define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with sgt.
define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}
; Test with sge.
define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}
; Test with ult.
define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with ule.
define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with ugt.
define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}
; Test with uge.
define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <4 x i32> %val1, %val2
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}

View File

@ -0,0 +1,83 @@
; Test v2i64 maximum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with sle.
define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with sgt.
define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}
; Test with sge.
define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}
; Test with ult.
define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with ule.
define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with ugt.
define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}
; Test with uge.
define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <2 x i64> %val1, %val2
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,83 @@
; Test v16i8 minimum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with sle.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with sgt.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}
; Test with sge.
define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}
; Test with ult.
define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with ule.
define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
ret <16 x i8> %ret
}
; Test with ugt.
define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}
; Test with uge.
define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <16 x i8> %val2, %val1
%ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
ret <16 x i8> %ret
}

View File

@ -0,0 +1,83 @@
; Test v8i16 minimum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with sle.
define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with sgt.
define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}
; Test with sge.
define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}
; Test with ult.
define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with ule.
define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
ret <8 x i16> %ret
}
; Test with ugt.
define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}
; Test with uge.
define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <8 x i16> %val2, %val1
%ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
ret <8 x i16> %ret
}

View File

@ -0,0 +1,83 @@
; Test v4i32 minimum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with sle.
define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with sgt.
define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}
; Test with sge.
define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}
; Test with ult.
define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with ule.
define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
ret <4 x i32> %ret
}
; Test with ugt.
define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}
; Test with uge.
define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <4 x i32> %val2, %val1
%ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
ret <4 x i32> %ret
}

View File

@ -0,0 +1,83 @@
; Test v2i64 minimum.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test with slt.
define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp slt <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with sle.
define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sle <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with sgt.
define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sgt <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}
; Test with sge.
define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp sge <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}
; Test with ult.
define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f5:
; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ult <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with ule.
define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ule <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
ret <2 x i64> %ret
}
; Test with ugt.
define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f7:
; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp ugt <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}
; Test with uge.
define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
; CHECK: br %r14
%cmp = icmp uge <2 x i64> %val2, %val1
%ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,35 @@
; Test vector register moves.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 moves.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vlr %v24, %v26
; CHECK: br %r14
ret <16 x i8> %val2
}
; Test v8i16 moves.
define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vlr %v24, %v26
; CHECK: br %r14
ret <8 x i16> %val2
}
; Test v4i32 moves.
define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vlr %v24, %v26
; CHECK: br %r14
ret <4 x i32> %val2
}
; Test v2i64 moves.
define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vlr %v24, %v26
; CHECK: br %r14
ret <2 x i64> %val2
}

View File

@ -0,0 +1,93 @@
; Test vector loads.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 loads.
define <16 x i8> @f1(<16 x i8> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vl %v24, 0(%r2)
; CHECK: br %r14
%ret = load <16 x i8>, <16 x i8> *%ptr
ret <16 x i8> %ret
}
; Test v8i16 loads.
define <8 x i16> @f2(<8 x i16> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vl %v24, 0(%r2)
; CHECK: br %r14
%ret = load <8 x i16>, <8 x i16> *%ptr
ret <8 x i16> %ret
}
; Test v4i32 loads.
define <4 x i32> @f3(<4 x i32> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vl %v24, 0(%r2)
; CHECK: br %r14
%ret = load <4 x i32>, <4 x i32> *%ptr
ret <4 x i32> %ret
}
; Test v2i64 loads.
define <2 x i64> @f4(<2 x i64> *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vl %v24, 0(%r2)
; CHECK: br %r14
%ret = load <2 x i64>, <2 x i64> *%ptr
ret <2 x i64> %ret
}
; Test the highest aligned in-range offset.
define <16 x i8> @f7(<16 x i8> *%base) {
; CHECK-LABEL: f7:
; CHECK: vl %v24, 4080(%r2)
; CHECK: br %r14
%ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 255
%ret = load <16 x i8>, <16 x i8> *%ptr
ret <16 x i8> %ret
}
; Test the highest unaligned in-range offset.
define <16 x i8> @f8(i8 *%base) {
; CHECK-LABEL: f8:
; CHECK: vl %v24, 4095(%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 4095
%ptr = bitcast i8 *%addr to <16 x i8> *
%ret = load <16 x i8>, <16 x i8> *%ptr, align 1
ret <16 x i8> %ret
}
; Test the next offset up, which requires separate address logic,
define <16 x i8> @f9(<16 x i8> *%base) {
; CHECK-LABEL: f9:
; CHECK: aghi %r2, 4096
; CHECK: vl %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 256
%ret = load <16 x i8>, <16 x i8> *%ptr
ret <16 x i8> %ret
}
; Test negative offsets, which also require separate address logic,
define <16 x i8> @f10(<16 x i8> *%base) {
; CHECK-LABEL: f10:
; CHECK: aghi %r2, -16
; CHECK: vl %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 -1
%ret = load <16 x i8>, <16 x i8> *%ptr
ret <16 x i8> %ret
}
; Check that indexes are allowed.
define <16 x i8> @f11(i8 *%base, i64 %index) {
; CHECK-LABEL: f11:
; CHECK: vl %v24, 0(%r3,%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 %index
%ptr = bitcast i8 *%addr to <16 x i8> *
%ret = load <16 x i8>, <16 x i8> *%ptr, align 1
ret <16 x i8> %ret
}

View File

@ -0,0 +1,93 @@
; Test vector stores.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 stores.
define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vst %v24, 0(%r2)
; CHECK: br %r14
store <16 x i8> %val, <16 x i8> *%ptr
ret void
}
; Test v8i16 stores.
define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vst %v24, 0(%r2)
; CHECK: br %r14
store <8 x i16> %val, <8 x i16> *%ptr
ret void
}
; Test v4i32 stores.
define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vst %v24, 0(%r2)
; CHECK: br %r14
store <4 x i32> %val, <4 x i32> *%ptr
ret void
}
; Test v2i64 stores.
define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vst %v24, 0(%r2)
; CHECK: br %r14
store <2 x i64> %val, <2 x i64> *%ptr
ret void
}
; Test the highest aligned in-range offset.
define void @f7(<16 x i8> %val, <16 x i8> *%base) {
; CHECK-LABEL: f7:
; CHECK: vst %v24, 4080(%r2)
; CHECK: br %r14
%ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 255
store <16 x i8> %val, <16 x i8> *%ptr
ret void
}
; Test the highest unaligned in-range offset.
define void @f8(<16 x i8> %val, i8 *%base) {
; CHECK-LABEL: f8:
; CHECK: vst %v24, 4095(%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 4095
%ptr = bitcast i8 *%addr to <16 x i8> *
store <16 x i8> %val, <16 x i8> *%ptr, align 1
ret void
}
; Test the next offset up, which requires separate address logic,
define void @f9(<16 x i8> %val, <16 x i8> *%base) {
; CHECK-LABEL: f9:
; CHECK: aghi %r2, 4096
; CHECK: vst %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 256
store <16 x i8> %val, <16 x i8> *%ptr
ret void
}
; Test negative offsets, which also require separate address logic,
define void @f10(<16 x i8> %val, <16 x i8> *%base) {
; CHECK-LABEL: f10:
; CHECK: aghi %r2, -16
; CHECK: vst %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 -1
store <16 x i8> %val, <16 x i8> *%ptr
ret void
}
; Check that indexes are allowed.
define void @f11(<16 x i8> %val, i8 *%base, i64 %index) {
; CHECK-LABEL: f11:
; CHECK: vst %v24, 0(%r3,%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 %index
%ptr = bitcast i8 *%addr to <16 x i8> *
store <16 x i8> %val, <16 x i8> *%ptr, align 1
ret void
}

View File

@ -0,0 +1,121 @@
; Test vector insertion of register variables.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 insertion into the first element.
define <16 x i8> @f1(<16 x i8> %val, i8 %element) {
; CHECK-LABEL: f1:
; CHECK: vlvgb %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 %element, i32 0
ret <16 x i8> %ret
}
; Test v16i8 insertion into the last element.
define <16 x i8> @f2(<16 x i8> %val, i8 %element) {
; CHECK-LABEL: f2:
; CHECK: vlvgb %v24, %r2, 15
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 %element, i32 15
ret <16 x i8> %ret
}
; Test v16i8 insertion into a variable element.
define <16 x i8> @f3(<16 x i8> %val, i8 %element, i32 %index) {
; CHECK-LABEL: f3:
; CHECK: vlvgb %v24, %r2, 0(%r3)
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 %element, i32 %index
ret <16 x i8> %ret
}
; Test v8i16 insertion into the first element.
define <8 x i16> @f4(<8 x i16> %val, i16 %element) {
; CHECK-LABEL: f4:
; CHECK: vlvgh %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 %element, i32 0
ret <8 x i16> %ret
}
; Test v8i16 insertion into the last element.
define <8 x i16> @f5(<8 x i16> %val, i16 %element) {
; CHECK-LABEL: f5:
; CHECK: vlvgh %v24, %r2, 7
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 %element, i32 7
ret <8 x i16> %ret
}
; Test v8i16 insertion into a variable element.
define <8 x i16> @f6(<8 x i16> %val, i16 %element, i32 %index) {
; CHECK-LABEL: f6:
; CHECK: vlvgh %v24, %r2, 0(%r3)
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 %element, i32 %index
ret <8 x i16> %ret
}
; Test v4i32 insertion into the first element.
define <4 x i32> @f7(<4 x i32> %val, i32 %element) {
; CHECK-LABEL: f7:
; CHECK: vlvgf %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 %element, i32 0
ret <4 x i32> %ret
}
; Test v4i32 insertion into the last element.
define <4 x i32> @f8(<4 x i32> %val, i32 %element) {
; CHECK-LABEL: f8:
; CHECK: vlvgf %v24, %r2, 3
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 %element, i32 3
ret <4 x i32> %ret
}
; Test v4i32 insertion into a variable element.
define <4 x i32> @f9(<4 x i32> %val, i32 %element, i32 %index) {
; CHECK-LABEL: f9:
; CHECK: vlvgf %v24, %r2, 0(%r3)
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 %element, i32 %index
ret <4 x i32> %ret
}
; Test v2i64 insertion into the first element.
define <2 x i64> @f10(<2 x i64> %val, i64 %element) {
; CHECK-LABEL: f10:
; CHECK: vlvgg %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 %element, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into the last element.
define <2 x i64> @f11(<2 x i64> %val, i64 %element) {
; CHECK-LABEL: f11:
; CHECK: vlvgg %v24, %r2, 1
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 %element, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion into a variable element.
define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) {
; CHECK-LABEL: f12:
; CHECK: vlvgg %v24, %r2, 0(%r3)
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 %element, i32 %index
ret <2 x i64> %ret
}
; Test v16i8 insertion into a variable element plus one.
define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) {
; CHECK-LABEL: f19:
; CHECK: vlvgb %v24, %r2, 1(%r3)
; CHECK: br %r14
%add = add i32 %index, 1
%ret = insertelement <16 x i8> %val, i8 %element, i32 %add
ret <16 x i8> %ret
}

View File

@ -0,0 +1,161 @@
; Test vector extraction.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 extraction of the first element.
define i8 @f1(<16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: vlgvb %r2, %v24, 0
; CHECK: br %r14
%ret = extractelement <16 x i8> %val, i32 0
ret i8 %ret
}
; Test v16i8 extraction of the last element.
define i8 @f2(<16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: vlgvb %r2, %v24, 15
; CHECK: br %r14
%ret = extractelement <16 x i8> %val, i32 15
ret i8 %ret
}
; Test v16i8 extractions of an absurd element number. This must compile
; but we don't care what it does.
define i8 @f3(<16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK-NOT: vlgvb %r2, %v24, 100000
; CHECK: br %r14
%ret = extractelement <16 x i8> %val, i32 100000
ret i8 %ret
}
; Test v16i8 extraction of a variable element.
define i8 @f4(<16 x i8> %val, i32 %index) {
; CHECK-LABEL: f4:
; CHECK: vlgvb %r2, %v24, 0(%r2)
; CHECK: br %r14
%ret = extractelement <16 x i8> %val, i32 %index
ret i8 %ret
}
; Test v8i16 extraction of the first element.
define i16 @f5(<8 x i16> %val) {
; CHECK-LABEL: f5:
; CHECK: vlgvh %r2, %v24, 0
; CHECK: br %r14
%ret = extractelement <8 x i16> %val, i32 0
ret i16 %ret
}
; Test v8i16 extraction of the last element.
define i16 @f6(<8 x i16> %val) {
; CHECK-LABEL: f6:
; CHECK: vlgvh %r2, %v24, 7
; CHECK: br %r14
%ret = extractelement <8 x i16> %val, i32 7
ret i16 %ret
}
; Test v8i16 extractions of an absurd element number. This must compile
; but we don't care what it does.
define i16 @f7(<8 x i16> %val) {
; CHECK-LABEL: f7:
; CHECK-NOT: vlgvh %r2, %v24, 100000
; CHECK: br %r14
%ret = extractelement <8 x i16> %val, i32 100000
ret i16 %ret
}
; Test v8i16 extraction of a variable element.
define i16 @f8(<8 x i16> %val, i32 %index) {
; CHECK-LABEL: f8:
; CHECK: vlgvh %r2, %v24, 0(%r2)
; CHECK: br %r14
%ret = extractelement <8 x i16> %val, i32 %index
ret i16 %ret
}
; Test v4i32 extraction of the first element.
define i32 @f9(<4 x i32> %val) {
; CHECK-LABEL: f9:
; CHECK: vlgvf %r2, %v24, 0
; CHECK: br %r14
%ret = extractelement <4 x i32> %val, i32 0
ret i32 %ret
}
; Test v4i32 extraction of the last element.
define i32 @f10(<4 x i32> %val) {
; CHECK-LABEL: f10:
; CHECK: vlgvf %r2, %v24, 3
; CHECK: br %r14
%ret = extractelement <4 x i32> %val, i32 3
ret i32 %ret
}
; Test v4i32 extractions of an absurd element number. This must compile
; but we don't care what it does.
define i32 @f11(<4 x i32> %val) {
; CHECK-LABEL: f11:
; CHECK-NOT: vlgvf %r2, %v24, 100000
; CHECK: br %r14
%ret = extractelement <4 x i32> %val, i32 100000
ret i32 %ret
}
; Test v4i32 extraction of a variable element.
define i32 @f12(<4 x i32> %val, i32 %index) {
; CHECK-LABEL: f12:
; CHECK: vlgvf %r2, %v24, 0(%r2)
; CHECK: br %r14
%ret = extractelement <4 x i32> %val, i32 %index
ret i32 %ret
}
; Test v2i64 extraction of the first element.
define i64 @f13(<2 x i64> %val) {
; CHECK-LABEL: f13:
; CHECK: vlgvg %r2, %v24, 0
; CHECK: br %r14
%ret = extractelement <2 x i64> %val, i32 0
ret i64 %ret
}
; Test v2i64 extraction of the last element.
define i64 @f14(<2 x i64> %val) {
; CHECK-LABEL: f14:
; CHECK: vlgvg %r2, %v24, 1
; CHECK: br %r14
%ret = extractelement <2 x i64> %val, i32 1
ret i64 %ret
}
; Test v2i64 extractions of an absurd element number. This must compile
; but we don't care what it does.
define i64 @f15(<2 x i64> %val) {
; CHECK-LABEL: f15:
; CHECK-NOT: vlgvg %r2, %v24, 100000
; CHECK: br %r14
%ret = extractelement <2 x i64> %val, i32 100000
ret i64 %ret
}
; Test v2i64 extraction of a variable element.
define i64 @f16(<2 x i64> %val, i32 %index) {
; CHECK-LABEL: f16:
; CHECK: vlgvg %r2, %v24, 0(%r2)
; CHECK: br %r14
%ret = extractelement <2 x i64> %val, i32 %index
ret i64 %ret
}
; Test v16i8 extraction of a variable element with an offset.
define i8 @f27(<16 x i8> %val, i32 %index) {
; CHECK-LABEL: f27:
; CHECK: vlgvb %r2, %v24, 1(%r2)
; CHECK: br %r14
%add = add i32 %index, 1
%ret = extractelement <16 x i8> %val, i32 %add
ret i8 %ret
}

View File

@ -0,0 +1,13 @@
; Test vector builds using VLVGP.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the basic v2i64 usage.
define <2 x i64> @f1(i64 %a, i64 %b) {
; CHECK-LABEL: f1:
; CHECK: vlvgp %v24, %r2, %r3
; CHECK: br %r14
%veca = insertelement <2 x i64> undef, i64 %a, i32 0
%vecb = insertelement <2 x i64> %veca, i64 %b, i32 1
ret <2 x i64> %vecb
}

View File

@ -0,0 +1,39 @@
; Test scalar_to_vector expansion.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8.
define <16 x i8> @f1(i8 %val) {
; CHECK-LABEL: f1:
; CHECK: vlvgb %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <16 x i8> undef, i8 %val, i32 0
ret <16 x i8> %ret
}
; Test v8i16.
define <8 x i16> @f2(i16 %val) {
; CHECK-LABEL: f2:
; CHECK: vlvgh %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <8 x i16> undef, i16 %val, i32 0
ret <8 x i16> %ret
}
; Test v4i32.
define <4 x i32> @f3(i32 %val) {
; CHECK-LABEL: f3:
; CHECK: vlvgf %v24, %r2, 0
; CHECK: br %r14
%ret = insertelement <4 x i32> undef, i32 %val, i32 0
ret <4 x i32> %ret
}
; Test v2i64. Here we load %val into both halves.
define <2 x i64> @f4(i64 %val) {
; CHECK-LABEL: f4:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK: br %r14
%ret = insertelement <2 x i64> undef, i64 %val, i32 0
ret <2 x i64> %ret
}

View File

@ -0,0 +1,284 @@
; Test vector insertion of memory values.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 insertion into the first element.
define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vleb %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i8, i8 *%ptr
%ret = insertelement <16 x i8> %val, i8 %element, i32 0
ret <16 x i8> %ret
}
; Test v16i8 insertion into the last element.
define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vleb %v24, 0(%r2), 15
; CHECK: br %r14
%element = load i8, i8 *%ptr
%ret = insertelement <16 x i8> %val, i8 %element, i32 15
ret <16 x i8> %ret
}
; Test v16i8 insertion with the highest in-range offset.
define <16 x i8> @f3(<16 x i8> %val, i8 *%base) {
; CHECK-LABEL: f3:
; CHECK: vleb %v24, 4095(%r2), 10
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i32 4095
%element = load i8, i8 *%ptr
%ret = insertelement <16 x i8> %val, i8 %element, i32 10
ret <16 x i8> %ret
}
; Test v16i8 insertion with the first ouf-of-range offset.
define <16 x i8> @f4(<16 x i8> %val, i8 *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r2, 4096
; CHECK: vleb %v24, 0(%r2), 5
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i32 4096
%element = load i8, i8 *%ptr
%ret = insertelement <16 x i8> %val, i8 %element, i32 5
ret <16 x i8> %ret
}
; Test v16i8 insertion into a variable element.
define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) {
; CHECK-LABEL: f5:
; CHECK-NOT: vleb
; CHECK: br %r14
%element = load i8, i8 *%ptr
%ret = insertelement <16 x i8> %val, i8 %element, i32 %index
ret <16 x i8> %ret
}
; Test v8i16 insertion into the first element.
define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vleh %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i16, i16 *%ptr
%ret = insertelement <8 x i16> %val, i16 %element, i32 0
ret <8 x i16> %ret
}
; Test v8i16 insertion into the last element.
define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vleh %v24, 0(%r2), 7
; CHECK: br %r14
%element = load i16, i16 *%ptr
%ret = insertelement <8 x i16> %val, i16 %element, i32 7
ret <8 x i16> %ret
}
; Test v8i16 insertion with the highest in-range offset.
define <8 x i16> @f8(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f8:
; CHECK: vleh %v24, 4094(%r2), 5
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2047
%element = load i16, i16 *%ptr
%ret = insertelement <8 x i16> %val, i16 %element, i32 5
ret <8 x i16> %ret
}
; Test v8i16 insertion with the first ouf-of-range offset.
define <8 x i16> @f9(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f9:
; CHECK: aghi %r2, 4096
; CHECK: vleh %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2048
%element = load i16, i16 *%ptr
%ret = insertelement <8 x i16> %val, i16 %element, i32 1
ret <8 x i16> %ret
}
; Test v8i16 insertion into a variable element.
define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) {
; CHECK-LABEL: f10:
; CHECK-NOT: vleh
; CHECK: br %r14
%element = load i16, i16 *%ptr
%ret = insertelement <8 x i16> %val, i16 %element, i32 %index
ret <8 x i16> %ret
}
; Test v4i32 insertion into the first element.
define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f11:
; CHECK: vlef %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 0
ret <4 x i32> %ret
}
; Test v4i32 insertion into the last element.
define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f12:
; CHECK: vlef %v24, 0(%r2), 3
; CHECK: br %r14
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 3
ret <4 x i32> %ret
}
; Test v4i32 insertion with the highest in-range offset.
define <4 x i32> @f13(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f13:
; CHECK: vlef %v24, 4092(%r2), 2
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1023
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion with the first ouf-of-range offset.
define <4 x i32> @f14(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f14:
; CHECK: aghi %r2, 4096
; CHECK: vlef %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1024
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 1
ret <4 x i32> %ret
}
; Test v4i32 insertion into a variable element.
define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) {
; CHECK-LABEL: f15:
; CHECK-NOT: vlef
; CHECK: br %r14
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 %index
ret <4 x i32> %ret
}
; Test v2i64 insertion into the first element.
define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f16:
; CHECK: vleg %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into the last element.
define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f17:
; CHECK: vleg %v24, 0(%r2), 1
; CHECK: br %r14
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the highest in-range offset.
define <2 x i64> @f18(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f18:
; CHECK: vleg %v24, 4088(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 511
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the first ouf-of-range offset.
define <2 x i64> @f19(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f19:
; CHECK: aghi %r2, 4096
; CHECK: vleg %v24, 0(%r2), 0
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 512
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into a variable element.
define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
; CHECK-LABEL: f20:
; CHECK-NOT: vleg
; CHECK: br %r14
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 %index
ret <2 x i64> %ret
}
; Test a v4i32 gather of the first element.
define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) {
; CHECK-LABEL: f31:
; CHECK: vgef %v24, 0(%v26,%r2), 0
; CHECK: br %r14
%elem = extractelement <4 x i32> %index, i32 0
%ext = zext i32 %elem to i64
%add = add i64 %base, %ext
%ptr = inttoptr i64 %add to i32 *
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 0
ret <4 x i32> %ret
}
; Test a v4i32 gather of the last element.
define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) {
; CHECK-LABEL: f32:
; CHECK: vgef %v24, 0(%v26,%r2), 3
; CHECK: br %r14
%elem = extractelement <4 x i32> %index, i32 3
%ext = zext i32 %elem to i64
%add = add i64 %base, %ext
%ptr = inttoptr i64 %add to i32 *
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 3
ret <4 x i32> %ret
}
; Test a v4i32 gather with the highest in-range offset.
define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) {
; CHECK-LABEL: f33:
; CHECK: vgef %v24, 4095(%v26,%r2), 1
; CHECK: br %r14
%elem = extractelement <4 x i32> %index, i32 1
%ext = zext i32 %elem to i64
%add1 = add i64 %base, %ext
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i32 *
%element = load i32, i32 *%ptr
%ret = insertelement <4 x i32> %val, i32 %element, i32 1
ret <4 x i32> %ret
}
; Test a v2i64 gather of the first element.
define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) {
; CHECK-LABEL: f34:
; CHECK: vgeg %v24, 0(%v26,%r2), 0
; CHECK: br %r14
%elem = extractelement <2 x i64> %index, i32 0
%add = add i64 %base, %elem
%ptr = inttoptr i64 %add to i64 *
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 0
ret <2 x i64> %ret
}
; Test a v2i64 gather of the last element.
define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) {
; CHECK-LABEL: f35:
; CHECK: vgeg %v24, 0(%v26,%r2), 1
; CHECK: br %r14
%elem = extractelement <2 x i64> %index, i32 1
%add = add i64 %base, %elem
%ptr = inttoptr i64 %add to i64 *
%element = load i64, i64 *%ptr
%ret = insertelement <2 x i64> %val, i64 %element, i32 1
ret <2 x i64> %ret
}

View File

@ -0,0 +1,237 @@
; Test vector insertion of constants.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 insertion into the first element.
define <16 x i8> @f1(<16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: vleib %v24, 0, 0
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 0, i32 0
ret <16 x i8> %ret
}
; Test v16i8 insertion into the last element.
define <16 x i8> @f2(<16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: vleib %v24, 100, 15
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 100, i32 15
ret <16 x i8> %ret
}
; Test v16i8 insertion with the maximum signed value.
define <16 x i8> @f3(<16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK: vleib %v24, 127, 10
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 127, i32 10
ret <16 x i8> %ret
}
; Test v16i8 insertion with the minimum signed value.
define <16 x i8> @f4(<16 x i8> %val) {
; CHECK-LABEL: f4:
; CHECK: vleib %v24, -128, 11
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 128, i32 11
ret <16 x i8> %ret
}
; Test v16i8 insertion with the maximum unsigned value.
define <16 x i8> @f5(<16 x i8> %val) {
; CHECK-LABEL: f5:
; CHECK: vleib %v24, -1, 12
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 255, i32 12
ret <16 x i8> %ret
}
; Test v16i8 insertion into a variable element.
define <16 x i8> @f6(<16 x i8> %val, i32 %index) {
; CHECK-LABEL: f6:
; CHECK-NOT: vleib
; CHECK: br %r14
%ret = insertelement <16 x i8> %val, i8 0, i32 %index
ret <16 x i8> %ret
}
; Test v8i16 insertion into the first element.
define <8 x i16> @f7(<8 x i16> %val) {
; CHECK-LABEL: f7:
; CHECK: vleih %v24, 0, 0
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 0, i32 0
ret <8 x i16> %ret
}
; Test v8i16 insertion into the last element.
define <8 x i16> @f8(<8 x i16> %val) {
; CHECK-LABEL: f8:
; CHECK: vleih %v24, 0, 7
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 0, i32 7
ret <8 x i16> %ret
}
; Test v8i16 insertion with the maximum signed value.
define <8 x i16> @f9(<8 x i16> %val) {
; CHECK-LABEL: f9:
; CHECK: vleih %v24, 32767, 4
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 32767, i32 4
ret <8 x i16> %ret
}
; Test v8i16 insertion with the minimum signed value.
define <8 x i16> @f10(<8 x i16> %val) {
; CHECK-LABEL: f10:
; CHECK: vleih %v24, -32768, 5
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 32768, i32 5
ret <8 x i16> %ret
}
; Test v8i16 insertion with the maximum unsigned value.
define <8 x i16> @f11(<8 x i16> %val) {
; CHECK-LABEL: f11:
; CHECK: vleih %v24, -1, 6
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 65535, i32 6
ret <8 x i16> %ret
}
; Test v8i16 insertion into a variable element.
define <8 x i16> @f12(<8 x i16> %val, i32 %index) {
; CHECK-LABEL: f12:
; CHECK-NOT: vleih
; CHECK: br %r14
%ret = insertelement <8 x i16> %val, i16 0, i32 %index
ret <8 x i16> %ret
}
; Test v4i32 insertion into the first element.
define <4 x i32> @f13(<4 x i32> %val) {
; CHECK-LABEL: f13:
; CHECK: vleif %v24, 0, 0
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 0, i32 0
ret <4 x i32> %ret
}
; Test v4i32 insertion into the last element.
define <4 x i32> @f14(<4 x i32> %val) {
; CHECK-LABEL: f14:
; CHECK: vleif %v24, 0, 3
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 0, i32 3
ret <4 x i32> %ret
}
; Test v4i32 insertion with the maximum value allowed by VLEIF.
define <4 x i32> @f15(<4 x i32> %val) {
; CHECK-LABEL: f15:
; CHECK: vleif %v24, 32767, 1
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 32767, i32 1
ret <4 x i32> %ret
}
; Test v4i32 insertion with the next value up.
define <4 x i32> @f16(<4 x i32> %val) {
; CHECK-LABEL: f16:
; CHECK-NOT: vleif
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 32768, i32 1
ret <4 x i32> %ret
}
; Test v4i32 insertion with the minimum value allowed by VLEIF.
define <4 x i32> @f17(<4 x i32> %val) {
; CHECK-LABEL: f17:
; CHECK: vleif %v24, -32768, 2
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 -32768, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion with the next value down.
define <4 x i32> @f18(<4 x i32> %val) {
; CHECK-LABEL: f18:
; CHECK-NOT: vleif
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 -32769, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion into a variable element.
define <4 x i32> @f19(<4 x i32> %val, i32 %index) {
; CHECK-LABEL: f19:
; CHECK-NOT: vleif
; CHECK: br %r14
%ret = insertelement <4 x i32> %val, i32 0, i32 %index
ret <4 x i32> %ret
}
; Test v2i64 insertion into the first element.
define <2 x i64> @f20(<2 x i64> %val) {
; CHECK-LABEL: f20:
; CHECK: vleig %v24, 0, 0
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 0, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into the last element.
define <2 x i64> @f21(<2 x i64> %val) {
; CHECK-LABEL: f21:
; CHECK: vleig %v24, 0, 1
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 0, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the maximum value allowed by VLEIG.
define <2 x i64> @f22(<2 x i64> %val) {
; CHECK-LABEL: f22:
; CHECK: vleig %v24, 32767, 1
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 32767, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the next value up.
define <2 x i64> @f23(<2 x i64> %val) {
; CHECK-LABEL: f23:
; CHECK-NOT: vleig
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 32768, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the minimum value allowed by VLEIG.
define <2 x i64> @f24(<2 x i64> %val) {
; CHECK-LABEL: f24:
; CHECK: vleig %v24, -32768, 0
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 -32768, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion with the next value down.
define <2 x i64> @f25(<2 x i64> %val) {
; CHECK-LABEL: f25:
; CHECK-NOT: vleig
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 -32769, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into a variable element.
define <2 x i64> @f26(<2 x i64> %val, i32 %index) {
; CHECK-LABEL: f26:
; CHECK-NOT: vleig
; CHECK: br %r14
%ret = insertelement <2 x i64> %val, i64 0, i32 %index
ret <2 x i64> %ret
}

View File

@ -0,0 +1,328 @@
; Test vector extraction to memory.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 extraction from the first element.
define void @f1(<16 x i8> %val, i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vsteb %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <16 x i8> %val, i32 0
store i8 %element, i8 *%ptr
ret void
}
; Test v16i8 extraction from the last element.
define void @f2(<16 x i8> %val, i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vsteb %v24, 0(%r2), 15
; CHECK: br %r14
%element = extractelement <16 x i8> %val, i32 15
store i8 %element, i8 *%ptr
ret void
}
; Test v16i8 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f3(<16 x i8> %val, i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: vsteb %v24, 0(%r2), 16
; CHECK: br %r14
%element = extractelement <16 x i8> %val, i32 16
store i8 %element, i8 *%ptr
ret void
}
; Test v16i8 extraction with the highest in-range offset.
define void @f4(<16 x i8> %val, i8 *%base) {
; CHECK-LABEL: f4:
; CHECK: vsteb %v24, 4095(%r2), 10
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i32 4095
%element = extractelement <16 x i8> %val, i32 10
store i8 %element, i8 *%ptr
ret void
}
; Test v16i8 extraction with the first ouf-of-range offset.
define void @f5(<16 x i8> %val, i8 *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r2, 4096
; CHECK: vsteb %v24, 0(%r2), 5
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i32 4096
%element = extractelement <16 x i8> %val, i32 5
store i8 %element, i8 *%ptr
ret void
}
; Test v16i8 extraction from a variable element.
define void @f6(<16 x i8> %val, i8 *%ptr, i32 %index) {
; CHECK-LABEL: f6:
; CHECK-NOT: vsteb
; CHECK: br %r14
%element = extractelement <16 x i8> %val, i32 %index
store i8 %element, i8 *%ptr
ret void
}
; Test v8i16 extraction from the first element.
define void @f7(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vsteh %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 0
store i16 %element, i16 *%ptr
ret void
}
; Test v8i16 extraction from the last element.
define void @f8(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: vsteh %v24, 0(%r2), 7
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 7
store i16 %element, i16 *%ptr
ret void
}
; Test v8i16 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f9(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f9:
; CHECK-NOT: vsteh %v24, 0(%r2), 8
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 8
store i16 %element, i16 *%ptr
ret void
}
; Test v8i16 extraction with the highest in-range offset.
define void @f10(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f10:
; CHECK: vsteh %v24, 4094(%r2), 5
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2047
%element = extractelement <8 x i16> %val, i32 5
store i16 %element, i16 *%ptr
ret void
}
; Test v8i16 extraction with the first ouf-of-range offset.
define void @f11(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f11:
; CHECK: aghi %r2, 4096
; CHECK: vsteh %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2048
%element = extractelement <8 x i16> %val, i32 1
store i16 %element, i16 *%ptr
ret void
}
; Test v8i16 extraction from a variable element.
define void @f12(<8 x i16> %val, i16 *%ptr, i32 %index) {
; CHECK-LABEL: f12:
; CHECK-NOT: vsteh
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 %index
store i16 %element, i16 *%ptr
ret void
}
; Test v4i32 extraction from the first element.
define void @f13(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f13:
; CHECK: vstef %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 0
store i32 %element, i32 *%ptr
ret void
}
; Test v4i32 extraction from the last element.
define void @f14(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f14:
; CHECK: vstef %v24, 0(%r2), 3
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 3
store i32 %element, i32 *%ptr
ret void
}
; Test v4i32 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f15(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f15:
; CHECK-NOT: vstef %v24, 0(%r2), 4
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 4
store i32 %element, i32 *%ptr
ret void
}
; Test v4i32 extraction with the highest in-range offset.
define void @f16(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f16:
; CHECK: vstef %v24, 4092(%r2), 2
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1023
%element = extractelement <4 x i32> %val, i32 2
store i32 %element, i32 *%ptr
ret void
}
; Test v4i32 extraction with the first ouf-of-range offset.
define void @f17(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f17:
; CHECK: aghi %r2, 4096
; CHECK: vstef %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1024
%element = extractelement <4 x i32> %val, i32 1
store i32 %element, i32 *%ptr
ret void
}
; Test v4i32 extraction from a variable element.
define void @f18(<4 x i32> %val, i32 *%ptr, i32 %index) {
; CHECK-LABEL: f18:
; CHECK-NOT: vstef
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 %index
store i32 %element, i32 *%ptr
ret void
}
; Test v2i64 extraction from the first element.
define void @f19(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f19:
; CHECK: vsteg %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 0
store i64 %element, i64 *%ptr
ret void
}
; Test v2i64 extraction from the last element.
define void @f20(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f20:
; CHECK: vsteg %v24, 0(%r2), 1
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 1
store i64 %element, i64 *%ptr
ret void
}
; Test v2i64 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f21(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f21:
; CHECK-NOT: vsteg %v24, 0(%r2), 2
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 2
store i64 %element, i64 *%ptr
ret void
}
; Test v2i64 extraction with the highest in-range offset.
define void @f22(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f22:
; CHECK: vsteg %v24, 4088(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 511
%element = extractelement <2 x i64> %val, i32 1
store i64 %element, i64 *%ptr
ret void
}
; Test v2i64 extraction with the first ouf-of-range offset.
define void @f23(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f23:
; CHECK: aghi %r2, 4096
; CHECK: vsteg %v24, 0(%r2), 0
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 512
%element = extractelement <2 x i64> %val, i32 0
store i64 %element, i64 *%ptr
ret void
}
; Test v2i64 extraction from a variable element.
define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) {
; CHECK-LABEL: f24:
; CHECK-NOT: vsteg
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 %index
store i64 %element, i64 *%ptr
ret void
}
; Test a v4i32 scatter of the first element.
define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) {
; CHECK-LABEL: f37:
; CHECK: vscef %v24, 0(%v26,%r2), 0
; CHECK: br %r14
%elem = extractelement <4 x i32> %index, i32 0
%ext = zext i32 %elem to i64
%add = add i64 %base, %ext
%ptr = inttoptr i64 %add to i32 *
%element = extractelement <4 x i32> %val, i32 0
store i32 %element, i32 *%ptr
ret void
}
; Test a v4i32 scatter of the last element.
define void @f38(<4 x i32> %val, <4 x i32> %index, i64 %base) {
; CHECK-LABEL: f38:
; CHECK: vscef %v24, 0(%v26,%r2), 3
; CHECK: br %r14
%elem = extractelement <4 x i32> %index, i32 3
%ext = zext i32 %elem to i64
%add = add i64 %base, %ext
%ptr = inttoptr i64 %add to i32 *
%element = extractelement <4 x i32> %val, i32 3
store i32 %element, i32 *%ptr
ret void
}
; Test a v4i32 scatter with the highest in-range offset.
define void @f39(<4 x i32> %val, <4 x i32> %index, i64 %base) {
; CHECK-LABEL: f39:
; CHECK: vscef %v24, 4095(%v26,%r2), 1
; CHECK: br %r14
%elem = extractelement <4 x i32> %index, i32 1
%ext = zext i32 %elem to i64
%add1 = add i64 %base, %ext
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i32 *
%element = extractelement <4 x i32> %val, i32 1
store i32 %element, i32 *%ptr
ret void
}
; Test a v2i64 scatter of the first element.
define void @f40(<2 x i64> %val, <2 x i64> %index, i64 %base) {
; CHECK-LABEL: f40:
; CHECK: vsceg %v24, 0(%v26,%r2), 0
; CHECK: br %r14
%elem = extractelement <2 x i64> %index, i32 0
%add = add i64 %base, %elem
%ptr = inttoptr i64 %add to i64 *
%element = extractelement <2 x i64> %val, i32 0
store i64 %element, i64 *%ptr
ret void
}
; Test a v2i64 scatter of the last element.
define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) {
; CHECK-LABEL: f41:
; CHECK: vsceg %v24, 0(%v26,%r2), 1
; CHECK: br %r14
%elem = extractelement <2 x i64> %index, i32 1
%add = add i64 %base, %elem
%ptr = inttoptr i64 %add to i64 *
%element = extractelement <2 x i64> %val, i32 1
store i64 %element, i64 *%ptr
ret void
}

View File

@ -0,0 +1,93 @@
; Test insertions of register values into a nonzero index of an undef.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 insertion into an undef, with an arbitrary index.
define <16 x i8> @f1(i8 %val) {
; CHECK-LABEL: f1:
; CHECK: vlvgb %v24, %r2, 12
; CHECK-NEXT: br %r14
%ret = insertelement <16 x i8> undef, i8 %val, i32 12
ret <16 x i8> %ret
}
; Test v16i8 insertion into an undef, with the first good index for VLVGP.
define <16 x i8> @f2(i8 %val) {
; CHECK-LABEL: f2:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <16 x i8> undef, i8 %val, i32 7
ret <16 x i8> %ret
}
; Test v16i8 insertion into an undef, with the second good index for VLVGP.
define <16 x i8> @f3(i8 %val) {
; CHECK-LABEL: f3:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <16 x i8> undef, i8 %val, i32 15
ret <16 x i8> %ret
}
; Test v8i16 insertion into an undef, with an arbitrary index.
define <8 x i16> @f4(i16 %val) {
; CHECK-LABEL: f4:
; CHECK: vlvgh %v24, %r2, 5
; CHECK-NEXT: br %r14
%ret = insertelement <8 x i16> undef, i16 %val, i32 5
ret <8 x i16> %ret
}
; Test v8i16 insertion into an undef, with the first good index for VLVGP.
define <8 x i16> @f5(i16 %val) {
; CHECK-LABEL: f5:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <8 x i16> undef, i16 %val, i32 3
ret <8 x i16> %ret
}
; Test v8i16 insertion into an undef, with the second good index for VLVGP.
define <8 x i16> @f6(i16 %val) {
; CHECK-LABEL: f6:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <8 x i16> undef, i16 %val, i32 7
ret <8 x i16> %ret
}
; Test v4i32 insertion into an undef, with an arbitrary index.
define <4 x i32> @f7(i32 %val) {
; CHECK-LABEL: f7:
; CHECK: vlvgf %v24, %r2, 2
; CHECK-NEXT: br %r14
%ret = insertelement <4 x i32> undef, i32 %val, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion into an undef, with the first good index for VLVGP.
define <4 x i32> @f8(i32 %val) {
; CHECK-LABEL: f8:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <4 x i32> undef, i32 %val, i32 1
ret <4 x i32> %ret
}
; Test v4i32 insertion into an undef, with the second good index for VLVGP.
define <4 x i32> @f9(i32 %val) {
; CHECK-LABEL: f9:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <4 x i32> undef, i32 %val, i32 3
ret <4 x i32> %ret
}
; Test v2i64 insertion into an undef.
define <2 x i64> @f10(i64 %val) {
; CHECK-LABEL: f10:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK-NEXT: br %r14
%ret = insertelement <2 x i64> undef, i64 %val, i32 1
ret <2 x i64> %ret
}

View File

@ -0,0 +1,103 @@
; Test insertions of memory values into a nonzero index of an undef.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 insertion into an undef, with an arbitrary index.
define <16 x i8> @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlrepb %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> undef, i8 %val, i32 12
ret <16 x i8> %ret
}
; Test v16i8 insertion into an undef, with the first good index for VLVGP.
define <16 x i8> @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: {{vlrepb|vllezb}} %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> undef, i8 %val, i32 7
ret <16 x i8> %ret
}
; Test v16i8 insertion into an undef, with the second good index for VLVGP.
define <16 x i8> @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vlrepb %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> undef, i8 %val, i32 15
ret <16 x i8> %ret
}
; Test v8i16 insertion into an undef, with an arbitrary index.
define <8 x i16> @f4(i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vlreph %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i16, i16 *%ptr
%ret = insertelement <8 x i16> undef, i16 %val, i32 5
ret <8 x i16> %ret
}
; Test v8i16 insertion into an undef, with the first good index for VLVGP.
define <8 x i16> @f5(i16 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: {{vlreph|vllezh}} %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i16, i16 *%ptr
%ret = insertelement <8 x i16> undef, i16 %val, i32 3
ret <8 x i16> %ret
}
; Test v8i16 insertion into an undef, with the second good index for VLVGP.
define <8 x i16> @f6(i16 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vlreph %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i16, i16 *%ptr
%ret = insertelement <8 x i16> undef, i16 %val, i32 7
ret <8 x i16> %ret
}
; Test v4i32 insertion into an undef, with an arbitrary index.
define <4 x i32> @f7(i32 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vlrepf %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i32, i32 *%ptr
%ret = insertelement <4 x i32> undef, i32 %val, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion into an undef, with the first good index for VLVGP.
define <4 x i32> @f8(i32 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: {{vlrepf|vllezf}} %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i32, i32 *%ptr
%ret = insertelement <4 x i32> undef, i32 %val, i32 1
ret <4 x i32> %ret
}
; Test v4i32 insertion into an undef, with the second good index for VLVGP.
define <4 x i32> @f9(i32 *%ptr) {
; CHECK-LABEL: f9:
; CHECK: vlrepf %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i32, i32 *%ptr
%ret = insertelement <4 x i32> undef, i32 %val, i32 3
ret <4 x i32> %ret
}
; Test v2i64 insertion into an undef.
define <2 x i64> @f10(i64 *%ptr) {
; CHECK-LABEL: f10:
; CHECK: vlrepg %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i64, i64 *%ptr
%ret = insertelement <2 x i64> undef, i64 %val, i32 1
ret <2 x i64> %ret
}

View File

@ -0,0 +1,47 @@
; Test insertions of register values into 0.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 insertion into 0.
define <16 x i8> @f1(i8 %val1, i8 %val2) {
; CHECK-LABEL: f1:
; CHECK: vgbm %v24, 0
; CHECK-DAG: vlvgb %v24, %r2, 2
; CHECK-DAG: vlvgb %v24, %r3, 12
; CHECK: br %r14
%vec1 = insertelement <16 x i8> zeroinitializer, i8 %val1, i32 2
%vec2 = insertelement <16 x i8> %vec1, i8 %val2, i32 12
ret <16 x i8> %vec2
}
; Test v8i16 insertion into 0.
define <8 x i16> @f2(i16 %val1, i16 %val2) {
; CHECK-LABEL: f2:
; CHECK: vgbm %v24, 0
; CHECK-DAG: vlvgh %v24, %r2, 3
; CHECK-DAG: vlvgh %v24, %r3, 5
; CHECK: br %r14
%vec1 = insertelement <8 x i16> zeroinitializer, i16 %val1, i32 3
%vec2 = insertelement <8 x i16> %vec1, i16 %val2, i32 5
ret <8 x i16> %vec2
}
; Test v4i32 insertion into 0.
define <4 x i32> @f3(i32 %val) {
; CHECK-LABEL: f3:
; CHECK: vgbm %v24, 0
; CHECK: vlvgf %v24, %r2, 3
; CHECK: br %r14
%ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 3
ret <4 x i32> %ret
}
; Test v2i64 insertion into 0.
define <2 x i64> @f4(i64 %val) {
; CHECK-LABEL: f4:
; CHECK: lghi [[REG:%r[0-5]]], 0
; CHECK: vlvgp %v24, [[REG]], %r2
; CHECK: br %r14
%ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1
ret <2 x i64> %ret
}

View File

@ -0,0 +1,76 @@
; Test insertions of memory values into 0.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test VLLEZB.
define <16 x i8> @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vllezb %v24, 0(%r2)
; CHECK: br %r14
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
ret <16 x i8> %ret
}
; Test VLLEZB with the highest in-range offset.
define <16 x i8> @f2(i8 *%base) {
; CHECK-LABEL: f2:
; CHECK: vllezb %v24, 4095(%r2)
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i64 4095
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
ret <16 x i8> %ret
}
; Test VLLEZB with the next highest offset.
define <16 x i8> @f3(i8 *%base) {
; CHECK-LABEL: f3:
; CHECK-NOT: vllezb %v24, 4096(%r2)
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i64 4096
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
ret <16 x i8> %ret
}
; Test that VLLEZB allows an index.
define <16 x i8> @f4(i8 *%base, i64 %index) {
; CHECK-LABEL: f4:
; CHECK: vllezb %v24, 0({{%r2,%r3|%r3,%r2}})
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i64 %index
%val = load i8, i8 *%ptr
%ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
ret <16 x i8> %ret
}
; Test VLLEZH.
define <8 x i16> @f5(i16 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vllezh %v24, 0(%r2)
; CHECK: br %r14
%val = load i16, i16 *%ptr
%ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
ret <8 x i16> %ret
}
; Test VLLEZF.
define <4 x i32> @f6(i32 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vllezf %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
ret <4 x i32> %ret
}
; Test VLLEZG.
define <2 x i64> @f7(i64 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vllezg %v24, 0(%r2)
; CHECK: br %r14
%val = load i64, i64 *%ptr
%ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector multiplication.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 multiplication.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmlb %v24, %v26, %v28
; CHECK: br %r14
%ret = mul <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 multiplication.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmlhw %v24, %v26, %v28
; CHECK: br %r14
%ret = mul <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 multiplication.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmlf %v24, %v26, %v28
; CHECK: br %r14
%ret = mul <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 multiplication. There's no vector equivalent.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK-NOT: vmlg
; CHECK: br %r14
%ret = mul <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,36 @@
; Test vector multiply-and-add.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 multiply-and-add.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2,
<16 x i8> %val3) {
; CHECK-LABEL: f1:
; CHECK: vmalb %v24, %v26, %v28, %v30
; CHECK: br %r14
%mul = mul <16 x i8> %val1, %val2
%ret = add <16 x i8> %mul, %val3
ret <16 x i8> %ret
}
; Test a v8i16 multiply-and-add.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2,
<8 x i16> %val3) {
; CHECK-LABEL: f2:
; CHECK: vmalhw %v24, %v26, %v28, %v30
; CHECK: br %r14
%mul = mul <8 x i16> %val1, %val2
%ret = add <8 x i16> %mul, %val3
ret <8 x i16> %ret
}
; Test a v4i32 multiply-and-add.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> %val3) {
; CHECK-LABEL: f3:
; CHECK: vmalf %v24, %v26, %v28, %v30
; CHECK: br %r14
%mul = mul <4 x i32> %val1, %val2
%ret = add <4 x i32> %mul, %val3
ret <4 x i32> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector negation.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 negation.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: vlcb %v24, %v26
; CHECK: br %r14
%ret = sub <16 x i8> zeroinitializer, %val
ret <16 x i8> %ret
}
; Test a v8i16 negation.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f2:
; CHECK: vlch %v24, %v26
; CHECK: br %r14
%ret = sub <8 x i16> zeroinitializer, %val
ret <8 x i16> %ret
}
; Test a v4i32 negation.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f3:
; CHECK: vlcf %v24, %v26
; CHECK: br %r14
%ret = sub <4 x i32> zeroinitializer, %val
ret <4 x i32> %ret
}
; Test a v2i64 negation.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f4:
; CHECK: vlcg %v24, %v26
; CHECK: br %r14
%ret = sub <2 x i64> zeroinitializer, %val
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector OR.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 OR.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vo %v24, %v26, %v28
; CHECK: br %r14
%ret = or <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 OR.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vo %v24, %v26, %v28
; CHECK: br %r14
%ret = or <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 OR.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vo %v24, %v26, %v28
; CHECK: br %r14
%ret = or <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 OR.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vo %v24, %v26, %v28
; CHECK: br %r14
%ret = or <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,107 @@
; Test vector (or (and X, Z), (and Y, (not Z))) patterns.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
; CHECK-LABEL: f1:
; CHECK: vsel %v24, %v24, %v26, %v28
; CHECK: br %r14
%not = xor <16 x i8> %val3, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1>
%and1 = and <16 x i8> %val1, %val3
%and2 = and <16 x i8> %val2, %not
%ret = or <16 x i8> %and1, %and2
ret <16 x i8> %ret
}
; ...and again with the XOR applied to the other operand of the AND.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
; CHECK-LABEL: f2:
; CHECK: vsel %v24, %v26, %v24, %v28
; CHECK: br %r14
%not = xor <16 x i8> %val3, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1>
%and1 = and <16 x i8> %val1, %not
%and2 = and <16 x i8> %val2, %val3
%ret = or <16 x i8> %and1, %and2
ret <16 x i8> %ret
}
; Test v8i16.
define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
; CHECK-LABEL: f3:
; CHECK: vsel %v24, %v24, %v26, %v28
; CHECK: br %r14
%not = xor <8 x i16> %val3, <i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1>
%and1 = and <8 x i16> %val1, %val3
%and2 = and <8 x i16> %val2, %not
%ret = or <8 x i16> %and1, %and2
ret <8 x i16> %ret
}
; ...and again with the XOR applied to the other operand of the AND.
define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
; CHECK-LABEL: f4:
; CHECK: vsel %v24, %v26, %v24, %v28
; CHECK: br %r14
%not = xor <8 x i16> %val3, <i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1>
%and1 = and <8 x i16> %val1, %not
%and2 = and <8 x i16> %val2, %val3
%ret = or <8 x i16> %and1, %and2
ret <8 x i16> %ret
}
; Test v4i32.
define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
; CHECK-LABEL: f5:
; CHECK: vsel %v24, %v24, %v26, %v28
; CHECK: br %r14
%not = xor <4 x i32> %val3, <i32 -1, i32 -1, i32 -1, i32 -1>
%and1 = and <4 x i32> %val1, %val3
%and2 = and <4 x i32> %val2, %not
%ret = or <4 x i32> %and1, %and2
ret <4 x i32> %ret
}
; ...and again with the XOR applied to the other operand of the AND.
define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
; CHECK-LABEL: f6:
; CHECK: vsel %v24, %v26, %v24, %v28
; CHECK: br %r14
%not = xor <4 x i32> %val3, <i32 -1, i32 -1, i32 -1, i32 -1>
%and1 = and <4 x i32> %val1, %not
%and2 = and <4 x i32> %val2, %val3
%ret = or <4 x i32> %and1, %and2
ret <4 x i32> %ret
}
; Test v2i64.
define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
; CHECK-LABEL: f7:
; CHECK: vsel %v24, %v24, %v26, %v28
; CHECK: br %r14
%not = xor <2 x i64> %val3, <i64 -1, i64 -1>
%and1 = and <2 x i64> %val1, %val3
%and2 = and <2 x i64> %val2, %not
%ret = or <2 x i64> %and1, %and2
ret <2 x i64> %ret
}
; ...and again with the XOR applied to the other operand of the AND.
define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
; CHECK-LABEL: f8:
; CHECK: vsel %v24, %v26, %v24, %v28
; CHECK: br %r14
%not = xor <2 x i64> %val3, <i64 -1, i64 -1>
%and1 = and <2 x i64> %val1, %not
%and2 = and <2 x i64> %val2, %val3
%ret = or <2 x i64> %and1, %and2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,124 @@
; Test vector splat.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 splat of the first element.
define <16 x i8> @f1(<16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: vrepb %v24, %v24, 0
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> zeroinitializer
ret <16 x i8> %ret
}
; Test v16i8 splat of the last element.
define <16 x i8> @f2(<16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: vrepb %v24, %v24, 15
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> <i32 15, i32 15, i32 15, i32 15,
i32 15, i32 15, i32 15, i32 15,
i32 15, i32 15, i32 15, i32 15,
i32 15, i32 15, i32 15, i32 15>
ret <16 x i8> %ret
}
; Test v16i8 splat of an arbitrary element, using the second operand of
; the shufflevector.
define <16 x i8> @f3(<16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK: vrepb %v24, %v24, 4
; CHECK: br %r14
%ret = shufflevector <16 x i8> undef, <16 x i8> %val,
<16 x i32> <i32 20, i32 20, i32 20, i32 20,
i32 20, i32 20, i32 20, i32 20,
i32 20, i32 20, i32 20, i32 20,
i32 20, i32 20, i32 20, i32 20>
ret <16 x i8> %ret
}
; Test v8i16 splat of the first element.
define <8 x i16> @f4(<8 x i16> %val) {
; CHECK-LABEL: f4:
; CHECK: vreph %v24, %v24, 0
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test v8i16 splat of the last element.
define <8 x i16> @f5(<8 x i16> %val) {
; CHECK-LABEL: f5:
; CHECK: vreph %v24, %v24, 7
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> <i32 7, i32 7, i32 7, i32 7,
i32 7, i32 7, i32 7, i32 7>
ret <8 x i16> %ret
}
; Test v8i16 splat of an arbitrary element, using the second operand of
; the shufflevector.
define <8 x i16> @f6(<8 x i16> %val) {
; CHECK-LABEL: f6:
; CHECK: vreph %v24, %v24, 2
; CHECK: br %r14
%ret = shufflevector <8 x i16> undef, <8 x i16> %val,
<8 x i32> <i32 10, i32 10, i32 10, i32 10,
i32 10, i32 10, i32 10, i32 10>
ret <8 x i16> %ret
}
; Test v4i32 splat of the first element.
define <4 x i32> @f7(<4 x i32> %val) {
; CHECK-LABEL: f7:
; CHECK: vrepf %v24, %v24, 0
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test v4i32 splat of the last element.
define <4 x i32> @f8(<4 x i32> %val) {
; CHECK-LABEL: f8:
; CHECK: vrepf %v24, %v24, 3
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %ret
}
; Test v4i32 splat of an arbitrary element, using the second operand of
; the shufflevector.
define <4 x i32> @f9(<4 x i32> %val) {
; CHECK-LABEL: f9:
; CHECK: vrepf %v24, %v24, 1
; CHECK: br %r14
%ret = shufflevector <4 x i32> undef, <4 x i32> %val,
<4 x i32> <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %ret
}
; Test v2i64 splat of the first element.
define <2 x i64> @f10(<2 x i64> %val) {
; CHECK-LABEL: f10:
; CHECK: vrepg %v24, %v24, 0
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test v2i64 splat of the last element.
define <2 x i64> @f11(<2 x i64> %val) {
; CHECK-LABEL: f11:
; CHECK: vrepg %v24, %v24, 1
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> <i32 1, i32 1>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,144 @@
; Test replications of a scalar register value, represented as splats.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test v16i8 splat of the first element.
define <16 x i8> @f1(i8 %scalar) {
; CHECK-LABEL: f1:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vrepb %v24, [[REG]], 7
; CHECK: br %r14
%val = insertelement <16 x i8> undef, i8 %scalar, i32 0
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> zeroinitializer
ret <16 x i8> %ret
}
; Test v16i8 splat of the last element.
define <16 x i8> @f2(i8 %scalar) {
; CHECK-LABEL: f2:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vrepb %v24, [[REG]], 7
; CHECK: br %r14
%val = insertelement <16 x i8> undef, i8 %scalar, i32 15
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> <i32 15, i32 15, i32 15, i32 15,
i32 15, i32 15, i32 15, i32 15,
i32 15, i32 15, i32 15, i32 15,
i32 15, i32 15, i32 15, i32 15>
ret <16 x i8> %ret
}
; Test v16i8 splat of an arbitrary element, using the second operand of
; the shufflevector.
define <16 x i8> @f3(i8 %scalar) {
; CHECK-LABEL: f3:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vrepb %v24, [[REG]], 7
; CHECK: br %r14
%val = insertelement <16 x i8> undef, i8 %scalar, i32 4
%ret = shufflevector <16 x i8> undef, <16 x i8> %val,
<16 x i32> <i32 20, i32 20, i32 20, i32 20,
i32 20, i32 20, i32 20, i32 20,
i32 20, i32 20, i32 20, i32 20,
i32 20, i32 20, i32 20, i32 20>
ret <16 x i8> %ret
}
; Test v8i16 splat of the first element.
define <8 x i16> @f4(i16 %scalar) {
; CHECK-LABEL: f4:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vreph %v24, [[REG]], 3
; CHECK: br %r14
%val = insertelement <8 x i16> undef, i16 %scalar, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test v8i16 splat of the last element.
define <8 x i16> @f5(i16 %scalar) {
; CHECK-LABEL: f5:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vreph %v24, [[REG]], 3
; CHECK: br %r14
%val = insertelement <8 x i16> undef, i16 %scalar, i32 7
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> <i32 7, i32 7, i32 7, i32 7,
i32 7, i32 7, i32 7, i32 7>
ret <8 x i16> %ret
}
; Test v8i16 splat of an arbitrary element, using the second operand of
; the shufflevector.
define <8 x i16> @f6(i16 %scalar) {
; CHECK-LABEL: f6:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vreph %v24, [[REG]], 3
; CHECK: br %r14
%val = insertelement <8 x i16> undef, i16 %scalar, i32 2
%ret = shufflevector <8 x i16> undef, <8 x i16> %val,
<8 x i32> <i32 10, i32 10, i32 10, i32 10,
i32 10, i32 10, i32 10, i32 10>
ret <8 x i16> %ret
}
; Test v4i32 splat of the first element.
define <4 x i32> @f7(i32 %scalar) {
; CHECK-LABEL: f7:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vrepf %v24, [[REG]], 1
; CHECK: br %r14
%val = insertelement <4 x i32> undef, i32 %scalar, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test v4i32 splat of the last element.
define <4 x i32> @f8(i32 %scalar) {
; CHECK-LABEL: f8:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vrepf %v24, [[REG]], 1
; CHECK: br %r14
%val = insertelement <4 x i32> undef, i32 %scalar, i32 3
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %ret
}
; Test v4i32 splat of an arbitrary element, using the second operand of
; the shufflevector.
define <4 x i32> @f9(i32 %scalar) {
; CHECK-LABEL: f9:
; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
; CHECK: vrepf %v24, [[REG]], 1
; CHECK: br %r14
%val = insertelement <4 x i32> undef, i32 %scalar, i32 1
%ret = shufflevector <4 x i32> undef, <4 x i32> %val,
<4 x i32> <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %ret
}
; Test v2i64 splat of the first element.
define <2 x i64> @f10(i64 %scalar) {
; CHECK-LABEL: f10:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK: br %r14
%val = insertelement <2 x i64> undef, i64 %scalar, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test v2i64 splat of the last element.
define <2 x i64> @f11(i64 %scalar) {
; CHECK-LABEL: f11:
; CHECK: vlvgp %v24, %r2, %r2
; CHECK: br %r14
%val = insertelement <2 x i64> undef, i64 %scalar, i32 1
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> <i32 1, i32 1>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,173 @@
; Test replications of a scalar memory value, represented as splats.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 replicating load with no offset.
define <16 x i8> @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlrepb %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i8, i8 *%ptr
%val = insertelement <16 x i8> undef, i8 %scalar, i32 0
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> zeroinitializer
ret <16 x i8> %ret
}
; Test a v16i8 replicating load with the maximum in-range offset.
define <16 x i8> @f2(i8 *%base) {
; CHECK-LABEL: f2:
; CHECK: vlrepb %v24, 4095(%r2)
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i64 4095
%scalar = load i8, i8 *%ptr
%val = insertelement <16 x i8> undef, i8 %scalar, i32 0
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> zeroinitializer
ret <16 x i8> %ret
}
; Test a v16i8 replicating load with the first out-of-range offset.
define <16 x i8> @f3(i8 *%base) {
; CHECK-LABEL: f3:
; CHECK: aghi %r2, 4096
; CHECK: vlrepb %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i8, i8 *%base, i64 4096
%scalar = load i8, i8 *%ptr
%val = insertelement <16 x i8> undef, i8 %scalar, i32 0
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> zeroinitializer
ret <16 x i8> %ret
}
; Test a v8i16 replicating load with no offset.
define <8 x i16> @f4(i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vlreph %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i16, i16 *%ptr
%val = insertelement <8 x i16> undef, i16 %scalar, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test a v8i16 replicating load with the maximum in-range offset.
define <8 x i16> @f5(i16 *%base) {
; CHECK-LABEL: f5:
; CHECK: vlreph %v24, 4094(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2047
%scalar = load i16, i16 *%ptr
%val = insertelement <8 x i16> undef, i16 %scalar, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test a v8i16 replicating load with the first out-of-range offset.
define <8 x i16> @f6(i16 *%base) {
; CHECK-LABEL: f6:
; CHECK: aghi %r2, 4096
; CHECK: vlreph %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2048
%scalar = load i16, i16 *%ptr
%val = insertelement <8 x i16> undef, i16 %scalar, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test a v4i32 replicating load with no offset.
define <4 x i32> @f7(i32 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vlrepf %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i32, i32 *%ptr
%val = insertelement <4 x i32> undef, i32 %scalar, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test a v4i32 replicating load with the maximum in-range offset.
define <4 x i32> @f8(i32 *%base) {
; CHECK-LABEL: f8:
; CHECK: vlrepf %v24, 4092(%r2)
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i64 1023
%scalar = load i32, i32 *%ptr
%val = insertelement <4 x i32> undef, i32 %scalar, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test a v4i32 replicating load with the first out-of-range offset.
define <4 x i32> @f9(i32 *%base) {
; CHECK-LABEL: f9:
; CHECK: aghi %r2, 4096
; CHECK: vlrepf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i64 1024
%scalar = load i32, i32 *%ptr
%val = insertelement <4 x i32> undef, i32 %scalar, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test a v2i64 replicating load with no offset.
define <2 x i64> @f10(i64 *%ptr) {
; CHECK-LABEL: f10:
; CHECK: vlrepg %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i64, i64 *%ptr
%val = insertelement <2 x i64> undef, i64 %scalar, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test a v2i64 replicating load with the maximum in-range offset.
define <2 x i64> @f11(i64 *%base) {
; CHECK-LABEL: f11:
; CHECK: vlrepg %v24, 4088(%r2)
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 511
%scalar = load i64, i64 *%ptr
%val = insertelement <2 x i64> undef, i64 %scalar, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test a v2i64 replicating load with the first out-of-range offset.
define <2 x i64> @f12(i64 *%base) {
; CHECK-LABEL: f12:
; CHECK: aghi %r2, 4096
; CHECK: vlrepg %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 512
%scalar = load i64, i64 *%ptr
%val = insertelement <2 x i64> undef, i64 %scalar, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test a v16i8 replicating load with an index.
define <16 x i8> @f19(i8 *%base, i64 %index) {
; CHECK-LABEL: f19:
; CHECK: vlrepb %v24, 1023(%r3,%r2)
; CHECK: br %r14
%ptr1 = getelementptr i8, i8 *%base, i64 %index
%ptr = getelementptr i8, i8 *%ptr1, i64 1023
%scalar = load i8, i8 *%ptr
%val = insertelement <16 x i8> undef, i8 %scalar, i32 0
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> zeroinitializer
ret <16 x i8> %ret
}

View File

@ -0,0 +1,160 @@
; Test vector merge high.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a canonical v16i8 merge high.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmrhb %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 0, i32 16, i32 1, i32 17,
i32 2, i32 18, i32 3, i32 19,
i32 4, i32 20, i32 5, i32 21,
i32 6, i32 22, i32 7, i32 23>
ret <16 x i8> %ret
}
; Test a reversed v16i8 merge high.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmrhb %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 16, i32 0, i32 17, i32 1,
i32 18, i32 2, i32 19, i32 3,
i32 20, i32 4, i32 21, i32 5,
i32 22, i32 6, i32 23, i32 7>
ret <16 x i8> %ret
}
; Test a v16i8 merge high with only the first operand being used.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmrhb %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 0, i32 0, i32 1, i32 1,
i32 2, i32 2, i32 3, i32 3,
i32 4, i32 4, i32 5, i32 5,
i32 6, i32 6, i32 7, i32 7>
ret <16 x i8> %ret
}
; Test a v16i8 merge high with only the second operand being used.
; This is converted into @f3 by target-independent code.
define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmrhb %v24, %v26, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 16, i32 16, i32 17, i32 17,
i32 18, i32 18, i32 19, i32 19,
i32 20, i32 20, i32 21, i32 21,
i32 22, i32 22, i32 23, i32 23>
ret <16 x i8> %ret
}
; Test a v16i8 merge with both operands being the same. This too is
; converted into @f3 by target-independent code.
define <16 x i8> @f5(<16 x i8> %val) {
; CHECK-LABEL: f5:
; CHECK: vmrhb %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> %val,
<16 x i32> <i32 0, i32 16, i32 17, i32 17,
i32 18, i32 2, i32 3, i32 3,
i32 20, i32 20, i32 5, i32 5,
i32 6, i32 22, i32 23, i32 7>
ret <16 x i8> %ret
}
; Test a v16i8 merge in which some of the indices are don't care.
define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmrhb %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 0, i32 undef, i32 1, i32 17,
i32 undef, i32 18, i32 undef, i32 undef,
i32 undef, i32 20, i32 5, i32 21,
i32 undef, i32 22, i32 7, i32 undef>
ret <16 x i8> %ret
}
; Test a v16i8 merge in which one of the operands is undefined and where
; indices for that operand are "don't care". Target-independent code
; converts the indices themselves into "undef"s.
define <16 x i8> @f7(<16 x i8> %val) {
; CHECK-LABEL: f7:
; CHECK: vmrhb %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> undef, <16 x i8> %val,
<16 x i32> <i32 11, i32 16, i32 17, i32 5,
i32 18, i32 10, i32 19, i32 19,
i32 20, i32 20, i32 21, i32 3,
i32 2, i32 22, i32 9, i32 23>
ret <16 x i8> %ret
}
; Test a canonical v8i16 merge high.
define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmrhh %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 0, i32 8, i32 1, i32 9,
i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %ret
}
; Test a reversed v8i16 merge high.
define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f9:
; CHECK: vmrhh %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 8, i32 0, i32 9, i32 1,
i32 10, i32 2, i32 11, i32 3>
ret <8 x i16> %ret
}
; Test a canonical v4i32 merge high.
define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f10:
; CHECK: vmrhf %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 0, i32 4, i32 1, i32 5>
ret <4 x i32> %ret
}
; Test a reversed v4i32 merge high.
define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f11:
; CHECK: vmrhf %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 4, i32 0, i32 5, i32 1>
ret <4 x i32> %ret
}
; Test a canonical v2i64 merge high.
define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f12:
; CHECK: vmrhg %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
<2 x i32> <i32 0, i32 2>
ret <2 x i64> %ret
}
; Test a reversed v2i64 merge high.
define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f13:
; CHECK: vmrhg %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
<2 x i32> <i32 2, i32 0>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,160 @@
; Test vector merge low.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a canonical v16i8 merge low.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vmrlb %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 8, i32 24, i32 9, i32 25,
i32 10, i32 26, i32 11, i32 27,
i32 12, i32 28, i32 13, i32 29,
i32 14, i32 30, i32 15, i32 31>
ret <16 x i8> %ret
}
; Test a reversed v16i8 merge low.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vmrlb %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 24, i32 8, i32 25, i32 9,
i32 26, i32 10, i32 27, i32 11,
i32 28, i32 12, i32 29, i32 13,
i32 30, i32 14, i32 31, i32 15>
ret <16 x i8> %ret
}
; Test a v16i8 merge low with only the first operand being used.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vmrlb %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 8, i32 8, i32 9, i32 9,
i32 10, i32 10, i32 11, i32 11,
i32 12, i32 12, i32 13, i32 13,
i32 14, i32 14, i32 15, i32 15>
ret <16 x i8> %ret
}
; Test a v16i8 merge low with only the second operand being used.
; This is converted into @f3 by target-independent code.
define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vmrlb %v24, %v26, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 24, i32 24, i32 25, i32 25,
i32 26, i32 26, i32 27, i32 27,
i32 28, i32 28, i32 29, i32 29,
i32 30, i32 30, i32 31, i32 31>
ret <16 x i8> %ret
}
; Test a v16i8 merge with both operands being the same. This too is
; converted into @f3 by target-independent code.
define <16 x i8> @f5(<16 x i8> %val) {
; CHECK-LABEL: f5:
; CHECK: vmrlb %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> %val,
<16 x i32> <i32 8, i32 24, i32 25, i32 25,
i32 26, i32 10, i32 11, i32 11,
i32 28, i32 28, i32 13, i32 13,
i32 14, i32 30, i32 31, i32 15>
ret <16 x i8> %ret
}
; Test a v16i8 merge in which some of the indices are don't care.
define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vmrlb %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 8, i32 undef, i32 9, i32 25,
i32 undef, i32 26, i32 undef, i32 undef,
i32 undef, i32 28, i32 13, i32 29,
i32 undef, i32 30, i32 15, i32 undef>
ret <16 x i8> %ret
}
; Test a v16i8 merge in which one of the operands is undefined and where
; indices for that operand are "don't care". Target-independent code
; converts the indices themselves into "undef"s.
define <16 x i8> @f7(<16 x i8> %val) {
; CHECK-LABEL: f7:
; CHECK: vmrlb %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> undef, <16 x i8> %val,
<16 x i32> <i32 11, i32 24, i32 25, i32 5,
i32 26, i32 10, i32 27, i32 27,
i32 28, i32 28, i32 29, i32 3,
i32 2, i32 30, i32 9, i32 31>
ret <16 x i8> %ret
}
; Test a canonical v8i16 merge low.
define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vmrlh %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 4, i32 12, i32 5, i32 13,
i32 6, i32 14, i32 7, i32 15>
ret <8 x i16> %ret
}
; Test a reversed v8i16 merge low.
define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f9:
; CHECK: vmrlh %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 12, i32 4, i32 13, i32 5,
i32 14, i32 6, i32 15, i32 7>
ret <8 x i16> %ret
}
; Test a canonical v4i32 merge low.
define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f10:
; CHECK: vmrlf %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 2, i32 6, i32 3, i32 7>
ret <4 x i32> %ret
}
; Test a reversed v4i32 merge low.
define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f11:
; CHECK: vmrlf %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 6, i32 2, i32 7, i32 3>
ret <4 x i32> %ret
}
; Test a canonical v2i64 merge low.
define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f12:
; CHECK: vmrlg %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
<2 x i32> <i32 1, i32 3>
ret <2 x i64> %ret
}
; Test a reversed v2i64 merge low.
define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f13:
; CHECK: vmrlg %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
<2 x i32> <i32 3, i32 1>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,140 @@
; Test vector pack.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a canonical v16i8 pack.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vpkh %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 1, i32 3, i32 5, i32 7,
i32 9, i32 11, i32 13, i32 15,
i32 17, i32 19, i32 21, i32 23,
i32 25, i32 27, i32 29, i32 31>
ret <16 x i8> %ret
}
; Test a reversed v16i8 pack.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vpkh %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 17, i32 19, i32 21, i32 23,
i32 25, i32 27, i32 29, i32 31,
i32 1, i32 3, i32 5, i32 7,
i32 9, i32 11, i32 13, i32 15>
ret <16 x i8> %ret
}
; Test a v16i8 pack with only the first operand being used.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vpkh %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 1, i32 3, i32 5, i32 7,
i32 9, i32 11, i32 13, i32 15,
i32 1, i32 3, i32 5, i32 7,
i32 9, i32 11, i32 13, i32 15>
ret <16 x i8> %ret
}
; Test a v16i8 pack with only the second operand being used.
; This is converted into @f3 by target-independent code.
define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vpkh %v24, %v26, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 17, i32 19, i32 21, i32 23,
i32 25, i32 27, i32 29, i32 31,
i32 17, i32 19, i32 21, i32 23,
i32 25, i32 27, i32 29, i32 31>
ret <16 x i8> %ret
}
; Test a v16i8 pack with both operands being the same. This too is
; converted into @f3 by target-independent code.
define <16 x i8> @f5(<16 x i8> %val) {
; CHECK-LABEL: f5:
; CHECK: vpkh %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> %val,
<16 x i32> <i32 1, i32 3, i32 5, i32 7,
i32 9, i32 11, i32 13, i32 15,
i32 17, i32 19, i32 21, i32 23,
i32 25, i32 27, i32 29, i32 31>
ret <16 x i8> %ret
}
; Test a v16i8 pack in which some of the indices are don't care.
define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vpkh %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 1, i32 undef, i32 5, i32 7,
i32 undef, i32 11, i32 undef, i32 undef,
i32 undef, i32 19, i32 21, i32 23,
i32 undef, i32 27, i32 29, i32 undef>
ret <16 x i8> %ret
}
; Test a v16i8 pack in which one of the operands is undefined and where
; indices for that operand are "don't care". Target-independent code
; converts the indices themselves into "undef"s.
define <16 x i8> @f7(<16 x i8> %val) {
; CHECK-LABEL: f7:
; CHECK: vpkh %v24, %v24, %v24
; CHECK: br %r14
%ret = shufflevector <16 x i8> undef, <16 x i8> %val,
<16 x i32> <i32 7, i32 1, i32 9, i32 15,
i32 15, i32 3, i32 5, i32 1,
i32 17, i32 19, i32 21, i32 23,
i32 25, i32 27, i32 29, i32 31>
ret <16 x i8> %ret
}
; Test a canonical v8i16 pack.
define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vpkf %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
i32 9, i32 11, i32 13, i32 15>
ret <8 x i16> %ret
}
; Test a reversed v8i16 pack.
define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f9:
; CHECK: vpkf %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 9, i32 11, i32 13, i32 15,
i32 1, i32 3, i32 5, i32 7>
ret <8 x i16> %ret
}
; Test a canonical v4i32 pack.
define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f10:
; CHECK: vpkg %v24, %v24, %v26
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i32> %ret
}
; Test a reversed v4i32 pack.
define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f11:
; CHECK: vpkg %v24, %v26, %v24
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 5, i32 7, i32 1, i32 3>
ret <4 x i32> %ret
}

View File

@ -0,0 +1,125 @@
; Test vector shift left double immediate.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift with the lowest useful shift amount.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vsldb %v24, %v24, %v26, 1
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 1, i32 2, i32 3, i32 4,
i32 5, i32 6, i32 7, i32 8,
i32 9, i32 10, i32 11, i32 12,
i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %ret
}
; Test a v16i8 shift with the highest shift amount.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vsldb %v24, %v24, %v26, 15
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 15, i32 16, i32 17, i32 18,
i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26,
i32 27, i32 28, i32 29, i32 30>
ret <16 x i8> %ret
}
; Test a v16i8 shift in which the operands need to be reversed.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vsldb %v24, %v26, %v24, 4
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27,
i32 28, i32 29, i32 30, i32 31,
i32 0, i32 1, i32 2, i32 3>
ret <16 x i8> %ret
}
; Test a v16i8 shift in which the operands need to be duplicated.
define <16 x i8> @f4(<16 x i8> %val) {
; CHECK-LABEL: f4:
; CHECK: vsldb %v24, %v24, %v24, 7
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> <i32 7, i32 8, i32 9, i32 10,
i32 11, i32 12, i32 13, i32 14,
i32 15, i32 0, i32 1, i32 2,
i32 3, i32 4, i32 5, i32 6>
ret <16 x i8> %ret
}
; Test a v16i8 shift in which some of the indices are undefs.
define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f5:
; CHECK: vsldb %v24, %v24, %v26, 11
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef,
i32 15, i32 16, i32 undef, i32 18,
i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26>
ret <16 x i8> %ret
}
; ...and again with reversed operands.
define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f6:
; CHECK: vsldb %v24, %v26, %v24, 13
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 undef, i32 undef, i32 31, i32 0,
i32 1, i32 2, i32 3, i32 4,
i32 5, i32 6, i32 7, i32 8,
i32 9, i32 10, i32 11, i32 12>
ret <16 x i8> %ret
}
; Test a v8i16 shift with the lowest useful shift amount.
define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f7:
; CHECK: vsldb %v24, %v24, %v26, 2
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 1, i32 2, i32 3, i32 4,
i32 5, i32 6, i32 7, i32 8>
ret <8 x i16> %ret
}
; Test a v8i16 shift with the highest useful shift amount.
define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f8:
; CHECK: vsldb %v24, %v24, %v26, 14
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 7, i32 8, i32 9, i32 10,
i32 11, i32 12, i32 13, i32 14>
ret <8 x i16> %ret
}
; Test a v4i32 shift with the lowest useful shift amount.
define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f9:
; CHECK: vsldb %v24, %v24, %v26, 4
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %ret
}
; Test a v4i32 shift with the highest useful shift amount.
define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f10:
; CHECK: vsldb %v24, %v24, %v26, 12
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %ret
}
; We use VPDI for v2i64 shuffles.

View File

@ -0,0 +1,130 @@
; Test vector permutes using VPDI.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a high1/low2 permute for v16i8.
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vpdi %v24, %v24, %v26, 1
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7,
i32 24, i32 25, i32 26, i32 27,
i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %ret
}
; Test a low2/high1 permute for v16i8.
define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f2:
; CHECK: vpdi %v24, %v26, %v24, 4
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 24, i32 25, i32 26, i32 27,
i32 28, i32 29, i32 30, i32 31,
i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7>
ret <16 x i8> %ret
}
; Test a low1/high2 permute for v16i8.
define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f3:
; CHECK: vpdi %v24, %v24, %v26, 4
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 8, i32 9, i32 10, i32 undef,
i32 12, i32 undef, i32 14, i32 15,
i32 16, i32 17, i32 undef, i32 19,
i32 20, i32 21, i32 22, i32 undef>
ret <16 x i8> %ret
}
; Test a high2/low1 permute for v16i8.
define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f4:
; CHECK: vpdi %v24, %v26, %v24, 1
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 16, i32 17, i32 18, i32 19,
i32 20, i32 21, i32 22, i32 23,
i32 8, i32 9, i32 10, i32 11,
i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %ret
}
; Test reversing two doublewords in a v16i8.
define <16 x i8> @f5(<16 x i8> %val) {
; CHECK-LABEL: f5:
; CHECK: vpdi %v24, %v24, %v24, 4
; CHECK: br %r14
%ret = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> <i32 8, i32 9, i32 10, i32 11,
i32 12, i32 13, i32 14, i32 15,
i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7>
ret <16 x i8> %ret
}
; Test a high1/low2 permute for v8i16.
define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f6:
; CHECK: vpdi %v24, %v24, %v26, 1
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %ret
}
; Test a low2/high1 permute for v8i16.
define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f7:
; CHECK: vpdi %v24, %v26, %v24, 4
; CHECK: br %r14
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 12, i32 13, i32 14, i32 15,
i32 0, i32 1, i32 2, i32 3>
ret <8 x i16> %ret
}
; Test a high1/low2 permute for v4i32.
define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f8:
; CHECK: vpdi %v24, %v24, %v26, 1
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %ret
}
; Test a low2/high1 permute for v4i32.
define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f9:
; CHECK: vpdi %v24, %v26, %v24, 4
; CHECK: br %r14
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %ret
}
; Test a high1/low2 permute for v2i64.
define <2 x i64> @f10(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f10:
; CHECK: vpdi %v24, %v24, %v26, 1
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
<2 x i32> <i32 0, i32 3>
ret <2 x i64> %ret
}
; Test low2/high1 permute for v2i64.
define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f11:
; CHECK: vpdi %v24, %v26, %v24, 4
; CHECK: br %r14
%ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
<2 x i32> <i32 3, i32 0>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,38 @@
; Test general vector permute of a v16i8.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-CODE %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
; CHECK-CODE-LABEL: f1:
; CHECK-CODE: larl [[REG:%r[0-5]]],
; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
; CHECK-CODE: vperm %v24, %v24, %v26, [[MASK]]
; CHECK-CODE: br %r14
;
; CHECK-VECTOR: .byte 1
; CHECK-VECTOR-NEXT: .byte 19
; CHECK-VECTOR-NEXT: .byte 6
; CHECK-VECTOR-NEXT: .byte 5
; CHECK-VECTOR-NEXT: .byte 20
; CHECK-VECTOR-NEXT: .byte 22
; CHECK-VECTOR-NEXT: .byte 1
; CHECK-VECTOR-NEXT: .byte 1
; CHECK-VECTOR-NEXT: .byte 25
; CHECK-VECTOR-NEXT: .byte 29
; CHECK-VECTOR-NEXT: .byte 11
; Any byte would be OK here
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .byte 31
; CHECK-VECTOR-NEXT: .byte 4
; CHECK-VECTOR-NEXT: .byte 15
; CHECK-VECTOR-NEXT: .byte 19
%ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
<16 x i32> <i32 1, i32 19, i32 6, i32 5,
i32 20, i32 22, i32 1, i32 1,
i32 25, i32 29, i32 11, i32 undef,
i32 31, i32 4, i32 15, i32 19>
ret <16 x i8> %ret
}

View File

@ -0,0 +1,36 @@
; Test general vector permute of a v8i16.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-CODE %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
; CHECK-CODE-LABEL: f1:
; CHECK-CODE: larl [[REG:%r[0-5]]],
; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]]
; CHECK-CODE: br %r14
;
; CHECK-VECTOR: .byte 0
; CHECK-VECTOR-NEXT: .byte 1
; CHECK-VECTOR-NEXT: .byte 26
; CHECK-VECTOR-NEXT: .byte 27
; Any 2 bytes would be OK here
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .byte 28
; CHECK-VECTOR-NEXT: .byte 29
; CHECK-VECTOR-NEXT: .byte 6
; CHECK-VECTOR-NEXT: .byte 7
; CHECK-VECTOR-NEXT: .byte 14
; CHECK-VECTOR-NEXT: .byte 15
; CHECK-VECTOR-NEXT: .byte 8
; CHECK-VECTOR-NEXT: .byte 9
; CHECK-VECTOR-NEXT: .byte 16
; CHECK-VECTOR-NEXT: .byte 17
%ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
<8 x i32> <i32 8, i32 5, i32 undef, i32 6,
i32 11, i32 15, i32 12, i32 0>
ret <8 x i16> %ret
}

View File

@ -0,0 +1,35 @@
; Test general vector permute of a v4i32.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-CODE %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
; CHECK-CODE-LABEL: f1:
; CHECK-CODE: larl [[REG:%r[0-5]]],
; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]]
; CHECK-CODE: br %r14
;
; CHECK-VECTOR: .byte 4
; CHECK-VECTOR-NEXT: .byte 5
; CHECK-VECTOR-NEXT: .byte 6
; CHECK-VECTOR-NEXT: .byte 7
; CHECK-VECTOR-NEXT: .byte 20
; CHECK-VECTOR-NEXT: .byte 21
; CHECK-VECTOR-NEXT: .byte 22
; CHECK-VECTOR-NEXT: .byte 23
; Any 4 bytes would be OK here
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .space 1
; CHECK-VECTOR-NEXT: .byte 12
; CHECK-VECTOR-NEXT: .byte 13
; CHECK-VECTOR-NEXT: .byte 14
; CHECK-VECTOR-NEXT: .byte 15
%ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
<4 x i32> <i32 5, i32 1, i32 undef, i32 7>
ret <4 x i32> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector shift left with vector shift amount.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: veslvb %v24, %v26, %v28
; CHECK: br %r14
%ret = shl <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 shift.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: veslvh %v24, %v26, %v28
; CHECK: br %r14
%ret = shl <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 shift.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: veslvf %v24, %v26, %v28
; CHECK: br %r14
%ret = shl <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 shift.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: veslvg %v24, %v26, %v28
; CHECK: br %r14
%ret = shl <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector arithmetic shift right with vector shift amount.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vesravb %v24, %v26, %v28
; CHECK: br %r14
%ret = ashr <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 shift.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vesravh %v24, %v26, %v28
; CHECK: br %r14
%ret = ashr <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 shift.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vesravf %v24, %v26, %v28
; CHECK: br %r14
%ret = ashr <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 shift.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vesravg %v24, %v26, %v28
; CHECK: br %r14
%ret = ashr <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector logical shift right with vector shift amount.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vesrlvb %v24, %v26, %v28
; CHECK: br %r14
%ret = lshr <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 shift.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vesrlvh %v24, %v26, %v28
; CHECK: br %r14
%ret = lshr <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 shift.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vesrlvf %v24, %v26, %v28
; CHECK: br %r14
%ret = lshr <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 shift.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vesrlvg %v24, %v26, %v28
; CHECK: br %r14
%ret = lshr <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,134 @@
; Test vector shift left with scalar shift amount.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift by a variable.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
; CHECK-LABEL: f1:
; CHECK: veslb %v24, %v26, 0(%r2)
; CHECK: br %r14
%truncshift = trunc i32 %shift to i8
%shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
%val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
<16 x i32> zeroinitializer
%ret = shl <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v16i8 shift by the lowest useful constant.
define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: veslb %v24, %v26, 1
; CHECK: br %r14
%ret = shl <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %ret
}
; Test a v16i8 shift by the highest useful constant.
define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK: veslb %v24, %v26, 7
; CHECK: br %r14
%ret = shl <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %ret
}
; Test a v8i16 shift by a variable.
define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
; CHECK-LABEL: f4:
; CHECK: veslh %v24, %v26, 0(%r2)
; CHECK: br %r14
%truncshift = trunc i32 %shift to i16
%shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
%val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
<8 x i32> zeroinitializer
%ret = shl <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v8i16 shift by the lowest useful constant.
define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f5:
; CHECK: veslh %v24, %v26, 1
; CHECK: br %r14
%ret = shl <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %ret
}
; Test a v8i16 shift by the highest useful constant.
define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f6:
; CHECK: veslh %v24, %v26, 15
; CHECK: br %r14
%ret = shl <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %ret
}
; Test a v4i32 shift by a variable.
define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
; CHECK-LABEL: f7:
; CHECK: veslf %v24, %v26, 0(%r2)
; CHECK: br %r14
%shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
%val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
<4 x i32> zeroinitializer
%ret = shl <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v4i32 shift by the lowest useful constant.
define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f8:
; CHECK: veslf %v24, %v26, 1
; CHECK: br %r14
%ret = shl <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %ret
}
; Test a v4i32 shift by the highest useful constant.
define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f9:
; CHECK: veslf %v24, %v26, 31
; CHECK: br %r14
%ret = shl <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %ret
}
; Test a v2i64 shift by a variable.
define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
; CHECK-LABEL: f10:
; CHECK: veslg %v24, %v26, 0(%r2)
; CHECK: br %r14
%extshift = sext i32 %shift to i64
%shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
%val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
<2 x i32> zeroinitializer
%ret = shl <2 x i64> %val1, %val2
ret <2 x i64> %ret
}
; Test a v2i64 shift by the lowest useful constant.
define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f11:
; CHECK: veslg %v24, %v26, 1
; CHECK: br %r14
%ret = shl <2 x i64> %val, <i64 1, i64 1>
ret <2 x i64> %ret
}
; Test a v2i64 shift by the highest useful constant.
define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f12:
; CHECK: veslg %v24, %v26, 63
; CHECK: br %r14
%ret = shl <2 x i64> %val, <i64 63, i64 63>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,134 @@
; Test vector arithmetic shift right with scalar shift amount.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift by a variable.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
; CHECK-LABEL: f1:
; CHECK: vesrab %v24, %v26, 0(%r2)
; CHECK: br %r14
%truncshift = trunc i32 %shift to i8
%shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
%val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
<16 x i32> zeroinitializer
%ret = ashr <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v16i8 shift by the lowest useful constant.
define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: vesrab %v24, %v26, 1
; CHECK: br %r14
%ret = ashr <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %ret
}
; Test a v16i8 shift by the highest useful constant.
define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK: vesrab %v24, %v26, 7
; CHECK: br %r14
%ret = ashr <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %ret
}
; Test a v8i16 shift by a variable.
define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
; CHECK-LABEL: f4:
; CHECK: vesrah %v24, %v26, 0(%r2)
; CHECK: br %r14
%truncshift = trunc i32 %shift to i16
%shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
%val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
<8 x i32> zeroinitializer
%ret = ashr <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v8i16 shift by the lowest useful constant.
define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f5:
; CHECK: vesrah %v24, %v26, 1
; CHECK: br %r14
%ret = ashr <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %ret
}
; Test a v8i16 shift by the highest useful constant.
define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f6:
; CHECK: vesrah %v24, %v26, 15
; CHECK: br %r14
%ret = ashr <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %ret
}
; Test a v4i32 shift by a variable.
define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
; CHECK-LABEL: f7:
; CHECK: vesraf %v24, %v26, 0(%r2)
; CHECK: br %r14
%shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
%val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
<4 x i32> zeroinitializer
%ret = ashr <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v4i32 shift by the lowest useful constant.
define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f8:
; CHECK: vesraf %v24, %v26, 1
; CHECK: br %r14
%ret = ashr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %ret
}
; Test a v4i32 shift by the highest useful constant.
define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f9:
; CHECK: vesraf %v24, %v26, 31
; CHECK: br %r14
%ret = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %ret
}
; Test a v2i64 shift by a variable.
define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
; CHECK-LABEL: f10:
; CHECK: vesrag %v24, %v26, 0(%r2)
; CHECK: br %r14
%extshift = sext i32 %shift to i64
%shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
%val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
<2 x i32> zeroinitializer
%ret = ashr <2 x i64> %val1, %val2
ret <2 x i64> %ret
}
; Test a v2i64 shift by the lowest useful constant.
define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f11:
; CHECK: vesrag %v24, %v26, 1
; CHECK: br %r14
%ret = ashr <2 x i64> %val, <i64 1, i64 1>
ret <2 x i64> %ret
}
; Test a v2i64 shift by the highest useful constant.
define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f12:
; CHECK: vesrag %v24, %v26, 63
; CHECK: br %r14
%ret = ashr <2 x i64> %val, <i64 63, i64 63>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,134 @@
; Test vector logical shift right with scalar shift amount.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 shift by a variable.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
; CHECK-LABEL: f1:
; CHECK: vesrlb %v24, %v26, 0(%r2)
; CHECK: br %r14
%truncshift = trunc i32 %shift to i8
%shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
%val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
<16 x i32> zeroinitializer
%ret = lshr <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v16i8 shift by the lowest useful constant.
define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f2:
; CHECK: vesrlb %v24, %v26, 1
; CHECK: br %r14
%ret = lshr <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %ret
}
; Test a v16i8 shift by the highest useful constant.
define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
; CHECK-LABEL: f3:
; CHECK: vesrlb %v24, %v26, 7
; CHECK: br %r14
%ret = lshr <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %ret
}
; Test a v8i16 shift by a variable.
define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
; CHECK-LABEL: f4:
; CHECK: vesrlh %v24, %v26, 0(%r2)
; CHECK: br %r14
%truncshift = trunc i32 %shift to i16
%shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
%val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
<8 x i32> zeroinitializer
%ret = lshr <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v8i16 shift by the lowest useful constant.
define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f5:
; CHECK: vesrlh %v24, %v26, 1
; CHECK: br %r14
%ret = lshr <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %ret
}
; Test a v8i16 shift by the highest useful constant.
define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
; CHECK-LABEL: f6:
; CHECK: vesrlh %v24, %v26, 15
; CHECK: br %r14
%ret = lshr <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %ret
}
; Test a v4i32 shift by a variable.
define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
; CHECK-LABEL: f7:
; CHECK: vesrlf %v24, %v26, 0(%r2)
; CHECK: br %r14
%shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
%val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
<4 x i32> zeroinitializer
%ret = lshr <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v4i32 shift by the lowest useful constant.
define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f8:
; CHECK: vesrlf %v24, %v26, 1
; CHECK: br %r14
%ret = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %ret
}
; Test a v4i32 shift by the highest useful constant.
define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
; CHECK-LABEL: f9:
; CHECK: vesrlf %v24, %v26, 31
; CHECK: br %r14
%ret = lshr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %ret
}
; Test a v2i64 shift by a variable.
define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
; CHECK-LABEL: f10:
; CHECK: vesrlg %v24, %v26, 0(%r2)
; CHECK: br %r14
%extshift = sext i32 %shift to i64
%shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
%val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
<2 x i32> zeroinitializer
%ret = lshr <2 x i64> %val1, %val2
ret <2 x i64> %ret
}
; Test a v2i64 shift by the lowest useful constant.
define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f11:
; CHECK: vesrlg %v24, %v26, 1
; CHECK: br %r14
%ret = lshr <2 x i64> %val, <i64 1, i64 1>
ret <2 x i64> %ret
}
; Test a v2i64 shift by the highest useful constant.
define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
; CHECK-LABEL: f12:
; CHECK: vesrlg %v24, %v26, 63
; CHECK: br %r14
%ret = lshr <2 x i64> %val, <i64 63, i64 63>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,182 @@
; Test vector sign extensions.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i1->v16i8 extension.
define <16 x i8> @f1(<16 x i8> %val) {
; CHECK-LABEL: f1:
; CHECK: veslb [[REG:%v[0-9]+]], %v24, 7
; CHECK: vesrab %v24, [[REG]], 7
; CHECK: br %r14
%trunc = trunc <16 x i8> %val to <16 x i1>
%ret = sext <16 x i1> %trunc to <16 x i8>
ret <16 x i8> %ret
}
; Test a v8i1->v8i16 extension.
define <8 x i16> @f2(<8 x i16> %val) {
; CHECK-LABEL: f2:
; CHECK: veslh [[REG:%v[0-9]+]], %v24, 15
; CHECK: vesrah %v24, [[REG]], 15
; CHECK: br %r14
%trunc = trunc <8 x i16> %val to <8 x i1>
%ret = sext <8 x i1> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; Test a v8i8->v8i16 extension.
define <8 x i16> @f3(<8 x i16> %val) {
; CHECK-LABEL: f3:
; CHECK: veslh [[REG:%v[0-9]+]], %v24, 8
; CHECK: vesrah %v24, [[REG]], 8
; CHECK: br %r14
%trunc = trunc <8 x i16> %val to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; Test a v4i1->v4i32 extension.
define <4 x i32> @f4(<4 x i32> %val) {
; CHECK-LABEL: f4:
; CHECK: veslf [[REG:%v[0-9]+]], %v24, 31
; CHECK: vesraf %v24, [[REG]], 31
; CHECK: br %r14
%trunc = trunc <4 x i32> %val to <4 x i1>
%ret = sext <4 x i1> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; Test a v4i8->v4i32 extension.
define <4 x i32> @f5(<4 x i32> %val) {
; CHECK-LABEL: f5:
; CHECK: veslf [[REG:%v[0-9]+]], %v24, 24
; CHECK: vesraf %v24, [[REG]], 24
; CHECK: br %r14
%trunc = trunc <4 x i32> %val to <4 x i8>
%ret = sext <4 x i8> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; Test a v4i16->v4i32 extension.
define <4 x i32> @f6(<4 x i32> %val) {
; CHECK-LABEL: f6:
; CHECK: veslf [[REG:%v[0-9]+]], %v24, 16
; CHECK: vesraf %v24, [[REG]], 16
; CHECK: br %r14
%trunc = trunc <4 x i32> %val to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; Test a v2i1->v2i64 extension.
define <2 x i64> @f7(<2 x i64> %val) {
; CHECK-LABEL: f7:
; CHECK: veslg [[REG:%v[0-9]+]], %v24, 63
; CHECK: vesrag %v24, [[REG]], 63
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i1>
%ret = sext <2 x i1> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test a v2i8->v2i64 extension.
define <2 x i64> @f8(<2 x i64> %val) {
; CHECK-LABEL: f8:
; CHECK: vsegb %v24, %v24
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i8>
%ret = sext <2 x i8> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test a v2i16->v2i64 extension.
define <2 x i64> @f9(<2 x i64> %val) {
; CHECK-LABEL: f9:
; CHECK: vsegh %v24, %v24
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i16>
%ret = sext <2 x i16> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test a v2i32->v2i64 extension.
define <2 x i64> @f10(<2 x i64> %val) {
; CHECK-LABEL: f10:
; CHECK: vsegf %v24, %v24
; CHECK: br %r14
%trunc = trunc <2 x i64> %val to <2 x i32>
%ret = sext <2 x i32> %trunc to <2 x i64>
ret <2 x i64> %ret
}
; Test an alternative v2i8->v2i64 extension.
define <2 x i64> @f11(<2 x i64> %val) {
; CHECK-LABEL: f11:
; CHECK: vsegb %v24, %v24
; CHECK: br %r14
%shl = shl <2 x i64> %val, <i64 56, i64 56>
%ret = ashr <2 x i64> %shl, <i64 56, i64 56>
ret <2 x i64> %ret
}
; Test an alternative v2i16->v2i64 extension.
define <2 x i64> @f12(<2 x i64> %val) {
; CHECK-LABEL: f12:
; CHECK: vsegh %v24, %v24
; CHECK: br %r14
%shl = shl <2 x i64> %val, <i64 48, i64 48>
%ret = ashr <2 x i64> %shl, <i64 48, i64 48>
ret <2 x i64> %ret
}
; Test an alternative v2i32->v2i64 extension.
define <2 x i64> @f13(<2 x i64> %val) {
; CHECK-LABEL: f13:
; CHECK: vsegf %v24, %v24
; CHECK: br %r14
%shl = shl <2 x i64> %val, <i64 32, i64 32>
%ret = ashr <2 x i64> %shl, <i64 32, i64 32>
ret <2 x i64> %ret
}
; Test an extraction-based v2i8->v2i64 extension.
define <2 x i64> @f14(<16 x i8> %val) {
; CHECK-LABEL: f14:
; CHECK: vsegb %v24, %v24
; CHECK: br %r14
%elt0 = extractelement <16 x i8> %val, i32 7
%elt1 = extractelement <16 x i8> %val, i32 15
%ext0 = sext i8 %elt0 to i64
%ext1 = sext i8 %elt1 to i64
%vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
%vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
ret <2 x i64> %vec1
}
; Test an extraction-based v2i16->v2i64 extension.
define <2 x i64> @f15(<16 x i16> %val) {
; CHECK-LABEL: f15:
; CHECK: vsegh %v24, %v24
; CHECK: br %r14
%elt0 = extractelement <16 x i16> %val, i32 3
%elt1 = extractelement <16 x i16> %val, i32 7
%ext0 = sext i16 %elt0 to i64
%ext1 = sext i16 %elt1 to i64
%vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
%vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
ret <2 x i64> %vec1
}
; Test an extraction-based v2i32->v2i64 extension.
define <2 x i64> @f16(<16 x i32> %val) {
; CHECK-LABEL: f16:
; CHECK: vsegf %v24, %v24
; CHECK: br %r14
%elt0 = extractelement <16 x i32> %val, i32 1
%elt1 = extractelement <16 x i32> %val, i32 3
%ext0 = sext i32 %elt0 to i64
%ext1 = sext i32 %elt1 to i64
%vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
%vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
ret <2 x i64> %vec1
}

View File

@ -0,0 +1,39 @@
; Test vector subtraction.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 subtraction.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vsb %v24, %v26, %v28
; CHECK: br %r14
%ret = sub <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 subtraction.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vsh %v24, %v26, %v28
; CHECK: br %r14
%ret = sub <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 subtraction.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vsf %v24, %v26, %v28
; CHECK: br %r14
%ret = sub <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 subtraction.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vsg %v24, %v26, %v28
; CHECK: br %r14
%ret = sub <2 x i64> %val1, %val2
ret <2 x i64> %ret
}

View File

@ -0,0 +1,39 @@
; Test vector XOR.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test a v16i8 XOR.
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
; CHECK-LABEL: f1:
; CHECK: vx %v24, %v26, %v28
; CHECK: br %r14
%ret = xor <16 x i8> %val1, %val2
ret <16 x i8> %ret
}
; Test a v8i16 XOR.
define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
; CHECK-LABEL: f2:
; CHECK: vx %v24, %v26, %v28
; CHECK: br %r14
%ret = xor <8 x i16> %val1, %val2
ret <8 x i16> %ret
}
; Test a v4i32 XOR.
define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
; CHECK-LABEL: f3:
; CHECK: vx %v24, %v26, %v28
; CHECK: br %r14
%ret = xor <4 x i32> %val1, %val2
ret <4 x i32> %ret
}
; Test a v2i64 XOR.
define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
; CHECK-LABEL: f4:
; CHECK: vx %v24, %v26, %v28
; CHECK: br %r14
%ret = xor <2 x i64> %val1, %val2
ret <2 x i64> %ret
}