diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td index 6883db3bf07..96e7ca52569 100644 --- a/include/llvm/IR/IntrinsicsSystemZ.td +++ b/include/llvm/IR/IntrinsicsSystemZ.td @@ -11,6 +11,185 @@ // //===----------------------------------------------------------------------===// +class SystemZUnaryConv + : GCCBuiltin<"__builtin_s390_" ## name>, + Intrinsic<[result], [arg], [IntrNoMem]>; + +class SystemZUnary + : SystemZUnaryConv; + +class SystemZUnaryConvCC + : Intrinsic<[result, llvm_i32_ty], [arg], [IntrNoMem]>; + +class SystemZUnaryCC + : SystemZUnaryConvCC; + +class SystemZBinaryConv + : GCCBuiltin<"__builtin_s390_" ## name>, + Intrinsic<[result], [arg, arg], [IntrNoMem]>; + +class SystemZBinary + : SystemZBinaryConv; + +class SystemZBinaryInt + : GCCBuiltin<"__builtin_s390_" ## name>, + Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>; + +class SystemZBinaryConvCC + : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>; + +class SystemZBinaryConvIntCC + : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>; + +class SystemZBinaryCC + : SystemZBinaryConvCC; + +class SystemZTernaryConv + : GCCBuiltin<"__builtin_s390_" ## name>, + Intrinsic<[result], [arg, arg, result], [IntrNoMem]>; + +class SystemZTernary + : SystemZTernaryConv; + +class SystemZTernaryInt + : GCCBuiltin<"__builtin_s390_" ## name>, + Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>; + +class SystemZTernaryIntCC + : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>; + +class SystemZQuaternaryInt + : GCCBuiltin<"__builtin_s390_" ## name>, + Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>; + +class SystemZQuaternaryIntCC + : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty], + [IntrNoMem]>; + +multiclass SystemZUnaryExtBHF { + def b : SystemZUnaryConv; + def h : SystemZUnaryConv; + def f : SystemZUnaryConv; +} + +multiclass SystemZUnaryExtBHWF { + def b : SystemZUnaryConv; + def hw : SystemZUnaryConv; + def f : SystemZUnaryConv; +} + +multiclass SystemZUnaryBHF { + def b : SystemZUnary; + def h : SystemZUnary; + def f : SystemZUnary; +} + +multiclass SystemZUnaryBHFG : SystemZUnaryBHF { + def g : SystemZUnary; +} + +multiclass SystemZUnaryCCBHF { + def bs : SystemZUnaryCC; + def hs : SystemZUnaryCC; + def fs : SystemZUnaryCC; +} + +multiclass SystemZBinaryTruncHFG { + def h : SystemZBinaryConv; + def f : SystemZBinaryConv; + def g : SystemZBinaryConv; +} + +multiclass SystemZBinaryTruncCCHFG { + def hs : SystemZBinaryConvCC; + def fs : SystemZBinaryConvCC; + def gs : SystemZBinaryConvCC; +} + +multiclass SystemZBinaryExtBHF { + def b : SystemZBinaryConv; + def h : SystemZBinaryConv; + def f : SystemZBinaryConv; +} + +multiclass SystemZBinaryExtBHFG : SystemZBinaryExtBHF { + def g : SystemZBinaryConv; +} + +multiclass SystemZBinaryBHF { + def b : SystemZBinary; + def h : SystemZBinary; + def f : SystemZBinary; +} + +multiclass SystemZBinaryBHFG : SystemZBinaryBHF { + def g : SystemZBinary; +} + +multiclass SystemZBinaryIntBHFG { + def b : SystemZBinaryInt; + def h : SystemZBinaryInt; + def f : SystemZBinaryInt; + def g : SystemZBinaryInt; +} + +multiclass SystemZBinaryCCBHF { + def bs : SystemZBinaryCC; + def hs : SystemZBinaryCC; + def fs : SystemZBinaryCC; +} + +multiclass SystemZCompareBHFG { + def bs : SystemZBinaryCC; + def hs : SystemZBinaryCC; + def fs : SystemZBinaryCC; + def gs : SystemZBinaryCC; +} + +multiclass SystemZTernaryExtBHF { + def b : SystemZTernaryConv; + def h : SystemZTernaryConv; + def f : SystemZTernaryConv; +} + +multiclass SystemZTernaryExtBHFG : SystemZTernaryExtBHF { + def g : SystemZTernaryConv; +} + +multiclass SystemZTernaryBHF { + def b : SystemZTernary; + def h : SystemZTernary; + def f : SystemZTernary; +} + +multiclass SystemZTernaryIntBHF { + def b : SystemZTernaryInt; + def h : SystemZTernaryInt; + def f : SystemZTernaryInt; +} + +multiclass SystemZTernaryIntCCBHF { + def bs : SystemZTernaryIntCC; + def hs : SystemZTernaryIntCC; + def fs : SystemZTernaryIntCC; +} + +multiclass SystemZQuaternaryIntBHF { + def b : SystemZQuaternaryInt; + def h : SystemZQuaternaryInt; + def f : SystemZQuaternaryInt; +} + +multiclass SystemZQuaternaryIntBHFG : SystemZQuaternaryIntBHF { + def g : SystemZQuaternaryInt; +} + +multiclass SystemZQuaternaryIntCCBHF { + def bs : SystemZQuaternaryIntCC; + def hs : SystemZQuaternaryIntCC; + def fs : SystemZQuaternaryIntCC; +} + //===----------------------------------------------------------------------===// // // Transactional-execution intrinsics @@ -44,3 +223,154 @@ let TargetPrefix = "s390" in { Intrinsic<[], [llvm_i32_ty]>; } +//===----------------------------------------------------------------------===// +// +// Vector intrinsics +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "s390" in { + def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; + + def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">, + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], + [IntrReadArgMem]>; + + def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + + defm int_s390_vpks : SystemZBinaryTruncHFG<"vpks">; + defm int_s390_vpks : SystemZBinaryTruncCCHFG; + + defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">; + defm int_s390_vpkls : SystemZBinaryTruncCCHFG; + + def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">, + Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], + // In fact write-only but there's no property + // for that. + [IntrReadWriteArgMem]>; + + defm int_s390_vupl : SystemZUnaryExtBHWF<"vupl">; + defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">; + + defm int_s390_vuph : SystemZUnaryExtBHF<"vuph">; + defm int_s390_vuplh : SystemZUnaryExtBHF<"vuplh">; + + defm int_s390_vacc : SystemZBinaryBHFG<"vacc">; + + def int_s390_vaq : SystemZBinary<"vaq", llvm_v16i8_ty>; + def int_s390_vacq : SystemZTernary<"vacq", llvm_v16i8_ty>; + def int_s390_vaccq : SystemZBinary<"vaccq", llvm_v16i8_ty>; + def int_s390_vacccq : SystemZTernary<"vacccq", llvm_v16i8_ty>; + + defm int_s390_vavg : SystemZBinaryBHFG<"vavg">; + defm int_s390_vavgl : SystemZBinaryBHFG<"vavgl">; + + def int_s390_vcksm : SystemZBinary<"vcksm", llvm_v4i32_ty>; + + defm int_s390_vgfm : SystemZBinaryExtBHFG<"vgfm">; + defm int_s390_vgfma : SystemZTernaryExtBHFG<"vgfma">; + + defm int_s390_vmah : SystemZTernaryBHF<"vmah">; + defm int_s390_vmalh : SystemZTernaryBHF<"vmalh">; + defm int_s390_vmae : SystemZTernaryExtBHF<"vmae">; + defm int_s390_vmale : SystemZTernaryExtBHF<"vmale">; + defm int_s390_vmao : SystemZTernaryExtBHF<"vmao">; + defm int_s390_vmalo : SystemZTernaryExtBHF<"vmalo">; + + defm int_s390_vmh : SystemZBinaryBHF<"vmh">; + defm int_s390_vmlh : SystemZBinaryBHF<"vmlh">; + defm int_s390_vme : SystemZBinaryExtBHF<"vme">; + defm int_s390_vmle : SystemZBinaryExtBHF<"vmle">; + defm int_s390_vmo : SystemZBinaryExtBHF<"vmo">; + defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">; + + defm int_s390_verllv : SystemZBinaryBHFG<"verllv">; + defm int_s390_verll : SystemZBinaryIntBHFG<"verll">; + defm int_s390_verim : SystemZQuaternaryIntBHFG<"verim">; + + def int_s390_vsl : SystemZBinary<"vsl", llvm_v16i8_ty>; + def int_s390_vslb : SystemZBinary<"vslb", llvm_v16i8_ty>; + def int_s390_vsra : SystemZBinary<"vsra", llvm_v16i8_ty>; + def int_s390_vsrab : SystemZBinary<"vsrab", llvm_v16i8_ty>; + def int_s390_vsrl : SystemZBinary<"vsrl", llvm_v16i8_ty>; + def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>; + + def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">; + + def int_s390_vsq : SystemZBinary<"vsq", llvm_v16i8_ty>; + def int_s390_vsbiq : SystemZTernary<"vsbiq", llvm_v16i8_ty>; + def int_s390_vscbiq : SystemZBinary<"vscbiq", llvm_v16i8_ty>; + def int_s390_vsbcbiq : SystemZTernary<"vsbcbiq", llvm_v16i8_ty>; + + def int_s390_vsumb : SystemZBinaryConv<"vsumb", llvm_v4i32_ty, llvm_v16i8_ty>; + def int_s390_vsumh : SystemZBinaryConv<"vsumh", llvm_v4i32_ty, llvm_v8i16_ty>; + + def int_s390_vsumgh : SystemZBinaryConv<"vsumgh", llvm_v2i64_ty, + llvm_v8i16_ty>; + def int_s390_vsumgf : SystemZBinaryConv<"vsumgf", llvm_v2i64_ty, + llvm_v4i32_ty>; + + def int_s390_vsumqf : SystemZBinaryConv<"vsumqf", llvm_v16i8_ty, + llvm_v4i32_ty>; + def int_s390_vsumqg : SystemZBinaryConv<"vsumqg", llvm_v16i8_ty, + llvm_v2i64_ty>; + + def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>; + + defm int_s390_vceq : SystemZCompareBHFG<"vceq">; + defm int_s390_vch : SystemZCompareBHFG<"vch">; + defm int_s390_vchl : SystemZCompareBHFG<"vchl">; + + defm int_s390_vfae : SystemZTernaryIntBHF<"vfae">; + defm int_s390_vfae : SystemZTernaryIntCCBHF; + defm int_s390_vfaez : SystemZTernaryIntBHF<"vfaez">; + defm int_s390_vfaez : SystemZTernaryIntCCBHF; + + defm int_s390_vfee : SystemZBinaryBHF<"vfee">; + defm int_s390_vfee : SystemZBinaryCCBHF; + defm int_s390_vfeez : SystemZBinaryBHF<"vfeez">; + defm int_s390_vfeez : SystemZBinaryCCBHF; + + defm int_s390_vfene : SystemZBinaryBHF<"vfene">; + defm int_s390_vfene : SystemZBinaryCCBHF; + defm int_s390_vfenez : SystemZBinaryBHF<"vfenez">; + defm int_s390_vfenez : SystemZBinaryCCBHF; + + defm int_s390_vistr : SystemZUnaryBHF<"vistr">; + defm int_s390_vistr : SystemZUnaryCCBHF; + + defm int_s390_vstrc : SystemZQuaternaryIntBHF<"vstrc">; + defm int_s390_vstrc : SystemZQuaternaryIntCCBHF; + defm int_s390_vstrcz : SystemZQuaternaryIntBHF<"vstrcz">; + defm int_s390_vstrcz : SystemZQuaternaryIntCCBHF; + + def int_s390_vfcedbs : SystemZBinaryConvCC; + def int_s390_vfchdbs : SystemZBinaryConvCC; + def int_s390_vfchedbs : SystemZBinaryConvCC; + + def int_s390_vftcidb : SystemZBinaryConvIntCC; + + def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +} diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 6834818fc37..cafe2c5948c 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -80,6 +80,13 @@ const unsigned CCMASK_TEND_TX = CCMASK_0; const unsigned CCMASK_TEND_NOTX = CCMASK_2; const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; +// Condition-code mask assignments for vector comparisons (and similar +// operations). +const unsigned CCMASK_VCMP_ALL = CCMASK_0; +const unsigned CCMASK_VCMP_MIXED = CCMASK_1; +const unsigned CCMASK_VCMP_NONE = CCMASK_3; +const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; + // The position of the low CC bit in an IPM result. const unsigned IPM_CC = 28; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 24383109d57..367e89ee19e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -440,6 +440,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; @@ -1253,6 +1254,143 @@ static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, } } +// Return true if Op is an intrinsic node without chain that returns the +// CC value as its final argument. Provide the associated SystemZISD +// opcode and the mask of valid CC values if so. +static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpkshs: + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + Opcode = SystemZISD::PACKS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vpklshs: + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + Opcode = SystemZISD::PACKLS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vceqbs: + case Intrinsic::s390_vceqhs: + case Intrinsic::s390_vceqfs: + case Intrinsic::s390_vceqgs: + Opcode = SystemZISD::VICMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchbs: + case Intrinsic::s390_vchhs: + case Intrinsic::s390_vchfs: + case Intrinsic::s390_vchgs: + Opcode = SystemZISD::VICMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchlbs: + case Intrinsic::s390_vchlhs: + case Intrinsic::s390_vchlfs: + case Intrinsic::s390_vchlgs: + Opcode = SystemZISD::VICMPHLS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vtm: + Opcode = SystemZISD::VTM; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfaebs: + case Intrinsic::s390_vfaehs: + case Intrinsic::s390_vfaefs: + Opcode = SystemZISD::VFAE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfaezbs: + case Intrinsic::s390_vfaezhs: + case Intrinsic::s390_vfaezfs: + Opcode = SystemZISD::VFAEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeebs: + case Intrinsic::s390_vfeehs: + case Intrinsic::s390_vfeefs: + Opcode = SystemZISD::VFEE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeezbs: + case Intrinsic::s390_vfeezhs: + case Intrinsic::s390_vfeezfs: + Opcode = SystemZISD::VFEEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenebs: + case Intrinsic::s390_vfenehs: + case Intrinsic::s390_vfenefs: + Opcode = SystemZISD::VFENE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenezbs: + case Intrinsic::s390_vfenezhs: + case Intrinsic::s390_vfenezfs: + Opcode = SystemZISD::VFENEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vistrbs: + case Intrinsic::s390_vistrhs: + case Intrinsic::s390_vistrfs: + Opcode = SystemZISD::VISTR_CC; + CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; + return true; + + case Intrinsic::s390_vstrcbs: + case Intrinsic::s390_vstrchs: + case Intrinsic::s390_vstrcfs: + Opcode = SystemZISD::VSTRC_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrczbs: + case Intrinsic::s390_vstrczhs: + case Intrinsic::s390_vstrczfs: + Opcode = SystemZISD::VSTRCZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfcedbs: + Opcode = SystemZISD::VFCMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchdbs: + Opcode = SystemZISD::VFCMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchedbs: + Opcode = SystemZISD::VFCMPHES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vftcidb: + Opcode = SystemZISD::VFTCI; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + default: + return false; + } +} + // Emit an intrinsic with chain with a glued value instead of its CC result. static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { @@ -1273,6 +1411,23 @@ static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, return Intr; } +// Emit an intrinsic with a glued value instead of its CC result. +static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector Ops; + Ops.reserve(NumOps - 1); + for (unsigned I = 1; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + if (Op->getNumValues() == 1) + return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); + assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); + SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); + return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1876,6 +2031,10 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && + isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); @@ -1924,6 +2083,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { case ISD::INTRINSIC_W_CHAIN: Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); break; + case ISD::INTRINSIC_WO_CHAIN: + Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); + break; default: llvm_unreachable("Invalid comparison operands"); } @@ -3058,6 +3220,67 @@ SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return SDValue(); } +SDValue +SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCC(Op, Opcode, CCValid)) { + SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + if (Op->getNumValues() == 1) + return CC; + assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), + Glued, CC); + } + + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpdi: + return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vperm: + return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vuphb: + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplhb: + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplb: + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vupllb: + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vsumb: + case Intrinsic::s390_vsumh: + case Intrinsic::s390_vsumgh: + case Intrinsic::s390_vsumgf: + case Intrinsic::s390_vsumqf: + case Intrinsic::s390_vsumqg: + return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + + return SDValue(); +} + namespace { // Says that SystemZISD operation Opcode can be used to perform the equivalent // of a VPERM with permute vector Bytes. If Opcode takes three operands, @@ -4117,6 +4340,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerPREFETCH(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: @@ -4195,6 +4420,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(PERMUTE_DWORDS); OPCODE(PERMUTE); OPCODE(PACK); + OPCODE(PACKS_CC); + OPCODE(PACKLS_CC); OPCODE(UNPACK_HIGH); OPCODE(UNPACKL_HIGH); OPCODE(UNPACK_LOW); @@ -4206,11 +4433,28 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VICMPE); OPCODE(VICMPH); OPCODE(VICMPHL); + OPCODE(VICMPES); + OPCODE(VICMPHS); + OPCODE(VICMPHLS); OPCODE(VFCMPE); OPCODE(VFCMPH); OPCODE(VFCMPHE); + OPCODE(VFCMPES); + OPCODE(VFCMPHS); + OPCODE(VFCMPHES); + OPCODE(VFTCI); OPCODE(VEXTEND); OPCODE(VROUND); + OPCODE(VTM); + OPCODE(VFAE_CC); + OPCODE(VFAEZ_CC); + OPCODE(VFEE_CC); + OPCODE(VFEEZ_CC); + OPCODE(VFENE_CC); + OPCODE(VFENEZ_CC); + OPCODE(VISTR_CC); + OPCODE(VSTRC_CC); + OPCODE(VSTRCZ_CC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 7a3b6fa85ae..4a55e632100 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -201,6 +201,11 @@ enum { // Pack vector operands 0 and 1 into a single vector with half-sized elements. PACK, + // Likewise, but saturate the result and set CC. PACKS_CC does signed + // saturation and PACKLS_CC does unsigned saturation. + PACKS_CC, + PACKLS_CC, + // Unpack the first half of vector operand 0 into double-sized elements. // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. UNPACK_HIGH, @@ -228,6 +233,11 @@ enum { VICMPH, VICMPHL, + // Likewise, but also set the condition codes on the result. + VICMPES, + VICMPHS, + VICMPHLS, + // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and // greater than" and VFCMPHE for "ordered and greater than or equal to". @@ -235,6 +245,14 @@ enum { VFCMPH, VFCMPHE, + // Likewise, but also set the condition codes on the result. + VFCMPES, + VFCMPHS, + VFCMPHES, + + // Test floating-point data class for vectors. + VFTCI, + // Extend the even f32 elements of vector operand 0 to produce a vector // of f64 elements. VEXTEND, @@ -243,6 +261,20 @@ enum { // even elements of the result. VROUND, + // AND the two vector operands together and set CC based on the result. + VTM, + + // String operations that set CC as a side-effect. + VFAE_CC, + VFAEZ_CC, + VFEE_CC, + VFEEZ_CC, + VFENE_CC, + VFENEZ_CC, + VISTR_CC, + VSTRC_CC, + VSTRCZ_CC, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // @@ -438,6 +470,7 @@ private: SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index f95714d1e70..c101e43ada3 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -100,17 +100,20 @@ let Predicates = [FeatureVector] in { def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>; // Load to block boundary. The number of loaded bytes is only known - // at run time. - def VLBB : BinaryVRX<"vlbb", 0xE707, null_frag, v128any, 0>; + // at run time. The instruction is really polymorphic, but v128b matches + // the return type of the associated intrinsic. + def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>; // Load count to block boundary. let Defs = [CC] in def LCBB : InstRXE<0xE727, (outs GR32:$R1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3), - "lcbb\t$R1, $XBD2, $M3", []>; + "lcbb\t$R1, $XBD2, $M3", + [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, + imm32zx4:$M3))]>; // Load with length. The number of loaded bytes is only known at run time. - def VLL : BinaryVRSb<"vll", 0xE737, null_frag, 0>; + def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; // Load multiple. def VLM : LoadMultipleVRSa<"vlm", 0xE736>; @@ -185,7 +188,7 @@ let Predicates = [FeatureVector] in { def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>; // Store with length. The number of stored bytes is only known at run time. - def VSTL : StoreLengthVRSb<"vstl", 0xE73F, null_frag, 0>; + def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; // Store multiple. def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>; @@ -266,19 +269,19 @@ let Predicates = [FeatureVector] in { def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>; // Pack saturate. - defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, null_frag, null_frag, + defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc, v128b, v128h, 1>; - defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, null_frag, null_frag, + defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc, v128h, v128f, 2>; - defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, null_frag, null_frag, + defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc, v128f, v128g, 3>; // Pack saturate logical. - defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, null_frag, null_frag, + defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc, v128b, v128h, 1>; - defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, null_frag, null_frag, + defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc, v128h, v128f, 2>; - defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, null_frag, null_frag, + defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc, v128f, v128g, 3>; // Sign-extend to doubleword. @@ -344,20 +347,20 @@ let Predicates = [FeatureVector] in { def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; - def VAQ : BinaryVRRc<"vaq", 0xE7F3, null_frag, v128q, v128q, 4>; + def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; // Add compute carry. - def VACCB : BinaryVRRc<"vaccb", 0xE7F1, null_frag, v128b, v128b, 0>; - def VACCH : BinaryVRRc<"vacch", 0xE7F1, null_frag, v128h, v128h, 1>; - def VACCF : BinaryVRRc<"vaccf", 0xE7F1, null_frag, v128f, v128f, 2>; - def VACCG : BinaryVRRc<"vaccg", 0xE7F1, null_frag, v128g, v128g, 3>; - def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, null_frag, v128q, v128q, 4>; + def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; + def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; + def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; + def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; + def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; // Add with carry. - def VACQ : TernaryVRRd<"vacq", 0xE7BB, null_frag, v128q, v128q, 4>; + def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; // Add with carry compute carry. - def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, null_frag, v128q, v128q, 4>; + def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; // And. def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; @@ -366,19 +369,19 @@ let Predicates = [FeatureVector] in { def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; // Average. - def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, null_frag, v128b, v128b, 0>; - def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, null_frag, v128h, v128h, 1>; - def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, null_frag, v128f, v128f, 2>; - def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, null_frag, v128g, v128g, 3>; + def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; + def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; + def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; + def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; // Average logical. - def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, null_frag, v128b, v128b, 0>; - def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, null_frag, v128h, v128h, 1>; - def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, null_frag, v128f, v128f, 2>; - def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, null_frag, v128g, v128g, 3>; + def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; + def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; + def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; + def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; // Checksum. - def VCKSM : BinaryVRRc<"vcksm", 0xE766, null_frag, v128any, v128any>; + def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; // Count leading zeros. def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>; @@ -396,16 +399,16 @@ let Predicates = [FeatureVector] in { def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; // Galois field multiply sum. - def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, null_frag, v128b, v128b, 0>; - def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, null_frag, v128h, v128h, 1>; - def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, null_frag, v128f, v128f, 2>; - def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, null_frag, v128g, v128g, 3>; + def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>; + def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>; + def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>; + def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>; // Galois field multiply sum and accumulate. - def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, null_frag, v128b, v128b, 0>; - def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, null_frag, v128h, v128h, 1>; - def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, null_frag, v128f, v128f, 2>; - def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, null_frag, v128g, v128g, 3>; + def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>; + def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>; + def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>; + def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>; // Load complement. def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>; @@ -449,44 +452,44 @@ let Predicates = [FeatureVector] in { def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; // Multiply and add high. - def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, null_frag, v128b, v128b, 0>; - def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, null_frag, v128h, v128h, 1>; - def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, null_frag, v128f, v128f, 2>; + def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; + def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; + def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; // Multiply and add logical high. - def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, null_frag, v128b, v128b, 0>; - def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, null_frag, v128h, v128h, 1>; - def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, null_frag, v128f, v128f, 2>; + def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; + def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; + def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; // Multiply and add even. - def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, null_frag, v128h, v128b, 0>; - def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, null_frag, v128f, v128h, 1>; - def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, null_frag, v128g, v128f, 2>; + def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; + def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; + def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; // Multiply and add logical even. - def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, null_frag, v128h, v128b, 0>; - def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, null_frag, v128f, v128h, 1>; - def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, null_frag, v128g, v128f, 2>; + def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; + def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; + def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; // Multiply and add odd. - def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, null_frag, v128h, v128b, 0>; - def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, null_frag, v128f, v128h, 1>; - def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, null_frag, v128g, v128f, 2>; + def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; + def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; + def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; // Multiply and add logical odd. - def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, null_frag, v128h, v128b, 0>; - def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, null_frag, v128f, v128h, 1>; - def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, null_frag, v128g, v128f, 2>; + def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; + def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; + def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; // Multiply high. - def VMHB : BinaryVRRc<"vmhb", 0xE7A3, null_frag, v128b, v128b, 0>; - def VMHH : BinaryVRRc<"vmhh", 0xE7A3, null_frag, v128h, v128h, 1>; - def VMHF : BinaryVRRc<"vmhf", 0xE7A3, null_frag, v128f, v128f, 2>; + def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; + def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; + def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; // Multiply logical high. - def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, null_frag, v128b, v128b, 0>; - def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, null_frag, v128h, v128h, 1>; - def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, null_frag, v128f, v128f, 2>; + def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; + def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; + def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; // Multiply low. def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; @@ -494,24 +497,24 @@ let Predicates = [FeatureVector] in { def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; // Multiply even. - def VMEB : BinaryVRRc<"vmeb", 0xE7A6, null_frag, v128h, v128b, 0>; - def VMEH : BinaryVRRc<"vmeh", 0xE7A6, null_frag, v128f, v128h, 1>; - def VMEF : BinaryVRRc<"vmef", 0xE7A6, null_frag, v128g, v128f, 2>; + def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; + def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; + def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; // Multiply logical even. - def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, null_frag, v128h, v128b, 0>; - def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, null_frag, v128f, v128h, 1>; - def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, null_frag, v128g, v128f, 2>; + def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; + def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; + def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; // Multiply odd. - def VMOB : BinaryVRRc<"vmob", 0xE7A7, null_frag, v128h, v128b, 0>; - def VMOH : BinaryVRRc<"vmoh", 0xE7A7, null_frag, v128f, v128h, 1>; - def VMOF : BinaryVRRc<"vmof", 0xE7A7, null_frag, v128g, v128f, 2>; + def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; + def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; + def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; // Multiply logical odd. - def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, null_frag, v128h, v128b, 0>; - def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, null_frag, v128f, v128h, 1>; - def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, null_frag, v128g, v128f, 2>; + def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; + def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; + def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; // Nor. def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; @@ -524,22 +527,26 @@ let Predicates = [FeatureVector] in { def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; // Element rotate left logical (with vector shift amount). - def VERLLVB : BinaryVRRc<"verllvb", 0xE773, null_frag, v128b, v128b, 0>; - def VERLLVH : BinaryVRRc<"verllvh", 0xE773, null_frag, v128h, v128h, 1>; - def VERLLVF : BinaryVRRc<"verllvf", 0xE773, null_frag, v128f, v128f, 2>; - def VERLLVG : BinaryVRRc<"verllvg", 0xE773, null_frag, v128g, v128g, 3>; + def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb, + v128b, v128b, 0>; + def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh, + v128h, v128h, 1>; + def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf, + v128f, v128f, 2>; + def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg, + v128g, v128g, 3>; // Element rotate left logical (with scalar shift amount). - def VERLLB : BinaryVRSa<"verllb", 0xE733, null_frag, v128b, v128b, 0>; - def VERLLH : BinaryVRSa<"verllh", 0xE733, null_frag, v128h, v128h, 1>; - def VERLLF : BinaryVRSa<"verllf", 0xE733, null_frag, v128f, v128f, 2>; - def VERLLG : BinaryVRSa<"verllg", 0xE733, null_frag, v128g, v128g, 3>; + def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>; + def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>; + def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>; + def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>; // Element rotate and insert under mask. - def VERIMB : QuaternaryVRId<"verimb", 0xE772, null_frag, v128b, v128b, 0>; - def VERIMH : QuaternaryVRId<"verimh", 0xE772, null_frag, v128h, v128h, 1>; - def VERIMF : QuaternaryVRId<"verimf", 0xE772, null_frag, v128f, v128f, 2>; - def VERIMG : QuaternaryVRId<"verimg", 0xE772, null_frag, v128g, v128g, 3>; + def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>; + def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>; + def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>; + def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>; // Element shift left (with vector shift amount). def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>; @@ -578,45 +585,48 @@ let Predicates = [FeatureVector] in { def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>; // Shift left. - def VSL : BinaryVRRc<"vsl", 0xE774, null_frag, v128b, v128b>; + def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>; // Shift left by byte. - def VSLB : BinaryVRRc<"vslb", 0xE775, null_frag, v128b, v128b>; + def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>; // Shift left double by byte. def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), + (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; // Shift right arithmetic. - def VSRA : BinaryVRRc<"vsra", 0xE77E, null_frag, v128b, v128b>; + def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; // Shift right arithmetic by byte. - def VSRAB : BinaryVRRc<"vsrab", 0xE77F, null_frag, v128b, v128b>; + def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>; // Shift right logical. - def VSRL : BinaryVRRc<"vsrl", 0xE77C, null_frag, v128b, v128b>; + def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>; // Shift right logical by byte. - def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, null_frag, v128b, v128b>; + def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; // Subtract. def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>; def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>; def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>; - def VSQ : BinaryVRRc<"vsq", 0xE7F7, null_frag, v128q, v128q, 4>; + def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>; // Subtract compute borrow indication. - def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, null_frag, v128b, v128b, 0>; - def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, null_frag, v128h, v128h, 1>; - def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, null_frag, v128f, v128f, 2>; - def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, null_frag, v128g, v128g, 3>; - def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, null_frag, v128q, v128q, 4>; + def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>; + def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>; + def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>; + def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>; + def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>; // Subtract with borrow indication. - def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, null_frag, v128q, v128q, 4>; + def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>; // Subtract with borrow compute borrow indication. - def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, null_frag, v128q, v128q, 4>; + def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq, + v128q, v128q, 4>; // Sum across doubleword. def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>; @@ -742,38 +752,38 @@ let Predicates = [FeatureVector] in { } // Compare equal. - defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, null_frag, + defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes, v128b, v128b, 0>; - defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, null_frag, + defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes, v128h, v128h, 1>; - defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, null_frag, + defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes, v128f, v128f, 2>; - defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, null_frag, + defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes, v128g, v128g, 3>; // Compare high. - defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, null_frag, + defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs, v128b, v128b, 0>; - defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, null_frag, + defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs, v128h, v128h, 1>; - defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, null_frag, + defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs, v128f, v128f, 2>; - defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, null_frag, + defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs, v128g, v128g, 3>; // Compare high logical. - defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, null_frag, + defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls, v128b, v128b, 0>; - defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, null_frag, + defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls, v128h, v128h, 1>; - defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, null_frag, + defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls, v128f, v128f, 2>; - defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, null_frag, + defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls, v128g, v128g, 3>; // Test under mask. let Defs = [CC] in - def VTM : CompareVRRa<"vtm", 0xE7D8, null_frag, v128any, 0>; + def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>; } //===----------------------------------------------------------------------===// @@ -823,7 +833,7 @@ let Predicates = [FeatureVector] in { def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; // Load FP integer. - def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>; + def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; defm : VectorRounding; defm : VectorRounding; @@ -872,7 +882,7 @@ let Predicates = [FeatureVector] in { // Test data class immediate. let Defs = [CC] in { - def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, null_frag, v128g, v128db, 3, 0>; + def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; } } @@ -891,19 +901,19 @@ let Predicates = [FeatureVector] in { def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; // Compare equal. - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, null_frag, + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; // Compare high. - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, null_frag, + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; // Compare high or equal. - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, null_frag, + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; @@ -1026,62 +1036,62 @@ let AddedComplexity = 4 in { //===----------------------------------------------------------------------===// let Predicates = [FeatureVector] in { - defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, null_frag, null_frag, + defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc, v128b, v128b, 0, 0>; - defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, null_frag, null_frag, + defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc, v128h, v128h, 1, 0>; - defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, null_frag, null_frag, + defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc, v128f, v128f, 2, 0>; - defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, null_frag, null_frag, + defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc, v128b, v128b, 0, 2>; - defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, null_frag, null_frag, + defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc, v128h, v128h, 1, 2>; - defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, null_frag, null_frag, + defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc, v128f, v128f, 2, 2>; - defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, null_frag, null_frag, + defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc, v128b, v128b, 0, 0, 1>; - defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, null_frag, null_frag, + defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc, v128h, v128h, 1, 0, 1>; - defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, null_frag, null_frag, + defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc, v128f, v128f, 2, 0, 1>; - defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, null_frag, null_frag, + defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc, v128b, v128b, 0, 2, 3>; - defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, null_frag, null_frag, + defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc, v128h, v128h, 1, 2, 3>; - defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, null_frag, null_frag, + defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc, v128f, v128f, 2, 2, 3>; - defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, null_frag, null_frag, + defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc, v128b, v128b, 0, 0, 1>; - defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, null_frag, null_frag, + defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc, v128h, v128h, 1, 0, 1>; - defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, null_frag, null_frag, + defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc, v128f, v128f, 2, 0, 1>; - defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, null_frag, null_frag, - v128b, v128b, 0, 2, 3>; - defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, null_frag, null_frag, - v128h, v128h, 1, 2, 3>; - defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, null_frag, null_frag, - v128f, v128f, 2, 2, 3>; + defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb, + z_vfenez_cc, v128b, v128b, 0, 2, 3>; + defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh, + z_vfenez_cc, v128h, v128h, 1, 2, 3>; + defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf, + z_vfenez_cc, v128f, v128f, 2, 2, 3>; - defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, null_frag, null_frag, + defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc, v128b, v128b, 0>; - defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, null_frag, null_frag, + defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc, v128h, v128h, 1>; - defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, null_frag, null_frag, + defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc, v128f, v128f, 2>; - defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, null_frag, null_frag, - v128b, v128b, 0, 0>; - defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, null_frag, null_frag, - v128h, v128h, 1, 0>; - defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, null_frag, null_frag, - v128f, v128f, 2, 0>; - defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, null_frag, null_frag, - v128b, v128b, 0, 2>; - defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, null_frag, null_frag, - v128h, v128h, 1, 2>; - defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, null_frag, null_frag, - v128f, v128f, 2, 2>; + defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb, + z_vstrc_cc, v128b, v128b, 0, 0>; + defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch, + z_vstrc_cc, v128h, v128h, 1, 0>; + defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf, + z_vstrc_cc, v128f, v128f, 2, 0>; + defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb, + z_vstrcz_cc, v128b, v128b, 0, 2>; + defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh, + z_vstrcz_cc, v128h, v128h, 1, 2>; + defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, + z_vstrcz_cc, v128f, v128f, 2, 2>; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 9bf288aa68e..3c95a1e11b4 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -94,6 +94,9 @@ def SDT_ZReplicate : SDTypeProfile<1, 1, def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def SDT_ZVecUnary : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>]>; def SDT_ZVecBinary : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -106,6 +109,10 @@ def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>; +def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisVT<2, i32>]>; def SDT_ZRotateMask : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>, @@ -124,6 +131,12 @@ def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; +def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -193,6 +206,10 @@ def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", SDT_ZVecTernaryInt>; def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; +def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; @@ -207,11 +224,44 @@ def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; +def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary, + [SDNPOutGlue]>; def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>; +def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt, + [SDNPOutGlue]>; +def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt, + [SDNPOutGlue]>; +def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary, + [SDNPOutGlue]>; +def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt, + [SDNPOutGlue]>; +def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", + SDT_ZVecQuaternaryInt, [SDNPOutGlue]>; +def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt, + [SDNPOutGlue]>; class AtomicWOp : SDNode<"SystemZISD::"##name, profile, diff --git a/test/CodeGen/SystemZ/vec-intrinsics.ll b/test/CodeGen/SystemZ/vec-intrinsics.ll new file mode 100644 index 00000000000..55527787da4 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-intrinsics.ll @@ -0,0 +1,3335 @@ +; Test vector intrinsics. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare i32 @llvm.s390.lcbb(i8 *, i32) +declare <16 x i8> @llvm.s390.vlbb(i8 *, i32) +declare <16 x i8> @llvm.s390.vll(i32, i8 *) +declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) +declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) +declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) +declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) +declare void @llvm.s390.vstl(<16 x i8>, i32, i8 *) +declare <8 x i16> @llvm.s390.vuphb(<16 x i8>) +declare <4 x i32> @llvm.s390.vuphh(<8 x i16>) +declare <2 x i64> @llvm.s390.vuphf(<4 x i32>) +declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>) +declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>) +declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>) +declare <8 x i16> @llvm.s390.vuplb(<16 x i8>) +declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>) +declare <2 x i64> @llvm.s390.vuplf(<4 x i32>) +declare <8 x i16> @llvm.s390.vupllb(<16 x i8>) +declare <4 x i32> @llvm.s390.vupllh(<8 x i16>) +declare <2 x i64> @llvm.s390.vupllf(<4 x i32>) +declare <16 x i8> @llvm.s390.vaccb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vacch(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vaccf(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.s390.vaccg(<2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.s390.vaq(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vacq(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vaccq(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vacccq(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vavgb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vavgh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vavgf(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.s390.vavgg(<2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.s390.vavglb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vavglh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vavglf(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.s390.vavglg(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.s390.vcksm(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.s390.vgfmb(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vgfmh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vgfmf(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vgfmg(<2 x i64>, <2 x i64>) +declare <8 x i16> @llvm.s390.vgfmab(<16 x i8>, <16 x i8>, <8 x i16>) +declare <4 x i32> @llvm.s390.vgfmah(<8 x i16>, <8 x i16>, <4 x i32>) +declare <2 x i64> @llvm.s390.vgfmaf(<4 x i32>, <4 x i32>, <2 x i64>) +declare <16 x i8> @llvm.s390.vgfmag(<2 x i64>, <2 x i64>, <16 x i8>) +declare <16 x i8> @llvm.s390.vmahb(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vmahh(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmahf(<4 x i32>, <4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vmalhb(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vmalhh(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmalhf(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.s390.vmaeb(<16 x i8>, <16 x i8>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmaeh(<8 x i16>, <8 x i16>, <4 x i32>) +declare <2 x i64> @llvm.s390.vmaef(<4 x i32>, <4 x i32>, <2 x i64>) +declare <8 x i16> @llvm.s390.vmaleb(<16 x i8>, <16 x i8>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmaleh(<8 x i16>, <8 x i16>, <4 x i32>) +declare <2 x i64> @llvm.s390.vmalef(<4 x i32>, <4 x i32>, <2 x i64>) +declare <8 x i16> @llvm.s390.vmaob(<16 x i8>, <16 x i8>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmaoh(<8 x i16>, <8 x i16>, <4 x i32>) +declare <2 x i64> @llvm.s390.vmaof(<4 x i32>, <4 x i32>, <2 x i64>) +declare <8 x i16> @llvm.s390.vmalob(<16 x i8>, <16 x i8>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmaloh(<8 x i16>, <8 x i16>, <4 x i32>) +declare <2 x i64> @llvm.s390.vmalof(<4 x i32>, <4 x i32>, <2 x i64>) +declare <16 x i8> @llvm.s390.vmhb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vmhh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmhf(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vmlhb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vmlhh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vmlhf(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.s390.vmeb(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vmeh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vmef(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.s390.vmleb(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vmleh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vmlef(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.s390.vmob(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vmoh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32) +declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32) +declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32) +declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32) +declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32) +declare <2 x i64> @llvm.s390.verimg(<2 x i64>, <2 x i64>, <2 x i64>, i32) +declare <16 x i8> @llvm.s390.vsl(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vslb(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsra(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsrab(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsrl(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsrlb(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) +declare <16 x i8> @llvm.s390.vscbib(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vscbih(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vscbif(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.s390.vscbig(<2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.s390.vsq(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsbiq(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vscbiq(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vsbcbiq(<16 x i8>, <16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vsumb(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.s390.vsumh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vsumgh(<8 x i16>, <8 x i16>) +declare <2 x i64> @llvm.s390.vsumgf(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vsumqf(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vsumqg(<2 x i64>, <2 x i64>) +declare i32 @llvm.s390.vtm(<16 x i8>, <16 x i8>) +declare {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32>, <4 x i32>) +declare {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64>, <2 x i64>) +declare {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32>, <4 x i32>) +declare {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64>, <2 x i64>) +declare {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32>, <4 x i32>) +declare {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.s390.vfaeb(<16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.s390.vfaeh(<8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.s390.vfaef(<4 x i32>, <4 x i32>, i32) +declare {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8>, <16 x i8>, i32) +declare {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16>, <8 x i16>, i32) +declare {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32>, <4 x i32>, i32) +declare <16 x i8> @llvm.s390.vfaezb(<16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.s390.vfaezh(<8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.s390.vfaezf(<4 x i32>, <4 x i32>, i32) +declare {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8>, <16 x i8>, i32) +declare {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16>, <8 x i16>, i32) +declare {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32>, <4 x i32>, i32) +declare <16 x i8> @llvm.s390.vfeeb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vfeeh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vfeef(<4 x i32>, <4 x i32>) +declare {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vfeezb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vfeezh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vfeezf(<4 x i32>, <4 x i32>) +declare {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vfeneb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vfeneh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vfenef(<4 x i32>, <4 x i32>) +declare {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vfenezb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.s390.vfenezh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.s390.vfenezf(<4 x i32>, <4 x i32>) +declare {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8>, <16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16>, <8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.s390.vistrb(<16 x i8>) +declare <8 x i16> @llvm.s390.vistrh(<8 x i16>) +declare <4 x i32> @llvm.s390.vistrf(<4 x i32>) +declare {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8>) +declare {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16>) +declare {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32>) +declare <16 x i8> @llvm.s390.vstrcb(<16 x i8>, <16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.s390.vstrch(<8 x i16>, <8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.s390.vstrcf(<4 x i32>, <4 x i32>, <4 x i32>, i32) +declare {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8>, <16 x i8>, <16 x i8>, + i32) +declare {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16>, <8 x i16>, <8 x i16>, + i32) +declare {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32>, <4 x i32>, <4 x i32>, + i32) +declare <16 x i8> @llvm.s390.vstrczb(<16 x i8>, <16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.s390.vstrczh(<8 x i16>, <8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.s390.vstrczf(<4 x i32>, <4 x i32>, <4 x i32>, i32) +declare {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8>, <16 x i8>, <16 x i8>, + i32) +declare {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16>, <8 x i16>, <8 x i16>, + i32) +declare {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32>, <4 x i32>, <4 x i32>, + i32) +declare {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double>, <2 x double>) +declare {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double>, <2 x double>) +declare {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double>, <2 x double>) +declare {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double>, i32) +declare <2 x double> @llvm.s390.vfidb(<2 x double>, i32, i32) + +; LCBB with the lowest M3 operand. +define i32 @test_lcbb1(i8 *%ptr) { +; CHECK-LABEL: test_lcbb1: +; CHECK: lcbb %r2, 0(%r2), 0 +; CHECK: br %r14 + %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 0) + ret i32 %res +} + +; LCBB with the highest M3 operand. +define i32 @test_lcbb2(i8 *%ptr) { +; CHECK-LABEL: test_lcbb2: +; CHECK: lcbb %r2, 0(%r2), 15 +; CHECK: br %r14 + %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 15) + ret i32 %res +} + +; LCBB with a displacement and index. +define i32 @test_lcbb3(i8 *%base, i64 %index) { +; CHECK-LABEL: test_lcbb3: +; CHECK: lcbb %r2, 4095({{%r2,%r3|%r3,%r2}}), 4 +; CHECK: br %r14 + %add = add i64 %index, 4095 + %ptr = getelementptr i8, i8 *%base, i64 %add + %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 4) + ret i32 %res +} + +; LCBB with an out-of-range displacement. +define i32 @test_lcbb4(i8 *%base) { +; CHECK-LABEL: test_lcbb4: +; CHECK: lcbb %r2, 0({{%r[1-5]}}), 5 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 5) + ret i32 %res +} + +; VLBB with the lowest M3 operand. +define <16 x i8> @test_vlbb1(i8 *%ptr) { +; CHECK-LABEL: test_vlbb1: +; CHECK: vlbb %v24, 0(%r2), 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 0) + ret <16 x i8> %res +} + +; VLBB with the highest M3 operand. +define <16 x i8> @test_vlbb2(i8 *%ptr) { +; CHECK-LABEL: test_vlbb2: +; CHECK: vlbb %v24, 0(%r2), 15 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 15) + ret <16 x i8> %res +} + +; VLBB with a displacement and index. +define <16 x i8> @test_vlbb3(i8 *%base, i64 %index) { +; CHECK-LABEL: test_vlbb3: +; CHECK: vlbb %v24, 4095({{%r2,%r3|%r3,%r2}}), 4 +; CHECK: br %r14 + %add = add i64 %index, 4095 + %ptr = getelementptr i8, i8 *%base, i64 %add + %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 4) + ret <16 x i8> %res +} + +; VLBB with an out-of-range displacement. +define <16 x i8> @test_vlbb4(i8 *%base) { +; CHECK-LABEL: test_vlbb4: +; CHECK: vlbb %v24, 0({{%r[1-5]}}), 5 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 5) + ret <16 x i8> %res +} + +; VLL with the lowest in-range displacement. +define <16 x i8> @test_vll1(i8 *%ptr, i32 %length) { +; CHECK-LABEL: test_vll1: +; CHECK: vll %v24, %r3, 0(%r2) +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; VLL with the highest in-range displacement. +define <16 x i8> @test_vll2(i8 *%base, i32 %length) { +; CHECK-LABEL: test_vll2: +; CHECK: vll %v24, %r3, 4095(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4095 + %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; VLL with an out-of-range displacementa. +define <16 x i8> @test_vll3(i8 *%base, i32 %length) { +; CHECK-LABEL: test_vll3: +; CHECK: vll %v24, %r3, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; Check that VLL doesn't allow an index. +define <16 x i8> @test_vll4(i8 *%base, i64 %index, i32 %length) { +; CHECK-LABEL: test_vll4: +; CHECK: vll %v24, %r4, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 %index + %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; VPDI taking element 0 from each half. +define <2 x i64> @test_vpdi1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpdi1: +; CHECK: vpdi %v24, %v24, %v26, 0 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 0) + ret <2 x i64> %res +} + +; VPDI taking element 1 from each half. +define <2 x i64> @test_vpdi2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpdi2: +; CHECK: vpdi %v24, %v24, %v26, 10 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 10) + ret <2 x i64> %res +} + +; VPERM. +define <16 x i8> @test_vperm(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vperm: +; CHECK: vperm %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vperm(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VPKSH. +define <16 x i8> @test_vpksh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vpksh: +; CHECK: vpksh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vpksh(<8 x i16> %a, <8 x i16> %b) + ret <16 x i8> %res +} + +; VPKSF. +define <8 x i16> @test_vpksf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vpksf: +; CHECK: vpksf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vpksf(<4 x i32> %a, <4 x i32> %b) + ret <8 x i16> %res +} + +; VPKSG. +define <4 x i32> @test_vpksg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpksg: +; CHECK: vpksg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vpksg(<2 x i64> %a, <2 x i64> %b) + ret <4 x i32> %res +} + +; VPKSHS with no processing of the result. +define <16 x i8> @test_vpkshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vpkshs: +; CHECK: vpkshs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VPKSHS, storing to %ptr if all values were saturated. +define <16 x i8> @test_vpkshs_all_store(<8 x i16> %a, <8 x i16> %b, i32 *%ptr) { +; CHECK-LABEL: test_vpkshs_all_store: +; CHECK: vpkshs %v24, %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp uge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <16 x i8> %res +} + +; VPKSFS with no processing of the result. +define <8 x i16> @test_vpksfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vpksfs: +; CHECK: vpksfs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VPKSFS, storing to %ptr if any values were saturated. +define <8 x i16> @test_vpksfs_any_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) { +; CHECK-LABEL: test_vpksfs_any_store: +; CHECK: vpksfs %v24, %v24, %v26 +; CHECK-NEXT: {{jhe|je}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <8 x i16> %res +} + +; VPKSGS with no processing of the result. +define <4 x i32> @test_vpksgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vpksgs: +; CHECK: vpksgs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VPKSGS, storing to %ptr if no elements were saturated +define <4 x i32> @test_vpksgs_none_store(<2 x i64> %a, <2 x i64> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vpksgs_none_store: +; CHECK: vpksgs %v24, %v24, %v26 +; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp sle i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VPKLSH. +define <16 x i8> @test_vpklsh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vpklsh: +; CHECK: vpklsh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %a, <8 x i16> %b) + ret <16 x i8> %res +} + +; VPKLSF. +define <8 x i16> @test_vpklsf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vpklsf: +; CHECK: vpklsf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %a, <4 x i32> %b) + ret <8 x i16> %res +} + +; VPKLSG. +define <4 x i32> @test_vpklsg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpklsg: +; CHECK: vpklsg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %a, <2 x i64> %b) + ret <4 x i32> %res +} + +; VPKLSHS with no processing of the result. +define <16 x i8> @test_vpklshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vpklshs: +; CHECK: vpklshs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VPKLSHS, storing to %ptr if all values were saturated. +define <16 x i8> @test_vpklshs_all_store(<8 x i16> %a, <8 x i16> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vpklshs_all_store: +; CHECK: vpklshs %v24, %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp eq i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <16 x i8> %res +} + +; VPKLSFS with no processing of the result. +define <8 x i16> @test_vpklsfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vpklsfs: +; CHECK: vpklsfs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VPKLSFS, storing to %ptr if any values were saturated. +define <8 x i16> @test_vpklsfs_any_store(<4 x i32> %a, <4 x i32> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vpklsfs_any_store: +; CHECK: vpklsfs %v24, %v24, %v26 +; CHECK-NEXT: {{jhe|je}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp ne i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <8 x i16> %res +} + +; VPKLSGS with no processing of the result. +define <4 x i32> @test_vpklsgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vpklsgs: +; CHECK: vpklsgs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VPKLSGS, storing to %ptr if no elements were saturated +define <4 x i32> @test_vpklsgs_none_store(<2 x i64> %a, <2 x i64> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vpklsgs_none_store: +; CHECK: vpklsgs %v24, %v24, %v26 +; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VSTL with the lowest in-range displacement. +define void @test_vstl1(<16 x i8> %vec, i8 *%ptr, i32 %length) { +; CHECK-LABEL: test_vstl1: +; CHECK: vstl %v24, %r3, 0(%r2) +; CHECK: br %r14 + call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; VSTL with the highest in-range displacement. +define void @test_vstl2(<16 x i8> %vec, i8 *%base, i32 %length) { +; CHECK-LABEL: test_vstl2: +; CHECK: vstl %v24, %r3, 4095(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4095 + call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; VSTL with an out-of-range displacement. +define void @test_vstl3(<16 x i8> %vec, i8 *%base, i32 %length) { +; CHECK-LABEL: test_vstl3: +; CHECK: vstl %v24, %r3, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; Check that VSTL doesn't allow an index. +define void @test_vstl4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) { +; CHECK-LABEL: test_vstl4: +; CHECK: vstl %v24, %r4, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 %index + call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; VUPHB. +define <8 x i16> @test_vuphb(<16 x i8> %a) { +; CHECK-LABEL: test_vuphb: +; CHECK: vuphb %v24, %v24 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vuphb(<16 x i8> %a) + ret <8 x i16> %res +} + +; VUPHH. +define <4 x i32> @test_vuphh(<8 x i16> %a) { +; CHECK-LABEL: test_vuphh: +; CHECK: vuphh %v24, %v24 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vuphh(<8 x i16> %a) + ret <4 x i32> %res +} + +; VUPHF. +define <2 x i64> @test_vuphf(<4 x i32> %a) { +; CHECK-LABEL: test_vuphf: +; CHECK: vuphf %v24, %v24 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vuphf(<4 x i32> %a) + ret <2 x i64> %res +} + +; VUPLHB. +define <8 x i16> @test_vuplhb(<16 x i8> %a) { +; CHECK-LABEL: test_vuplhb: +; CHECK: vuplhb %v24, %v24 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %a) + ret <8 x i16> %res +} + +; VUPLHH. +define <4 x i32> @test_vuplhh(<8 x i16> %a) { +; CHECK-LABEL: test_vuplhh: +; CHECK: vuplhh %v24, %v24 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %a) + ret <4 x i32> %res +} + +; VUPLHF. +define <2 x i64> @test_vuplhf(<4 x i32> %a) { +; CHECK-LABEL: test_vuplhf: +; CHECK: vuplhf %v24, %v24 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %a) + ret <2 x i64> %res +} + +; VUPLB. +define <8 x i16> @test_vuplb(<16 x i8> %a) { +; CHECK-LABEL: test_vuplb: +; CHECK: vuplb %v24, %v24 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vuplb(<16 x i8> %a) + ret <8 x i16> %res +} + +; VUPLHW. +define <4 x i32> @test_vuplhw(<8 x i16> %a) { +; CHECK-LABEL: test_vuplhw: +; CHECK: vuplhw %v24, %v24 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %a) + ret <4 x i32> %res +} + +; VUPLF. +define <2 x i64> @test_vuplf(<4 x i32> %a) { +; CHECK-LABEL: test_vuplf: +; CHECK: vuplf %v24, %v24 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vuplf(<4 x i32> %a) + ret <2 x i64> %res +} + +; VUPLLB. +define <8 x i16> @test_vupllb(<16 x i8> %a) { +; CHECK-LABEL: test_vupllb: +; CHECK: vupllb %v24, %v24 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vupllb(<16 x i8> %a) + ret <8 x i16> %res +} + +; VUPLLH. +define <4 x i32> @test_vupllh(<8 x i16> %a) { +; CHECK-LABEL: test_vupllh: +; CHECK: vupllh %v24, %v24 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vupllh(<8 x i16> %a) + ret <4 x i32> %res +} + +; VUPLLF. +define <2 x i64> @test_vupllf(<4 x i32> %a) { +; CHECK-LABEL: test_vupllf: +; CHECK: vupllf %v24, %v24 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vupllf(<4 x i32> %a) + ret <2 x i64> %res +} + +; VACCB. +define <16 x i8> @test_vaccb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vaccb: +; CHECK: vaccb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vaccb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VACCH. +define <8 x i16> @test_vacch(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vacch: +; CHECK: vacch %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vacch(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VACCF. +define <4 x i32> @test_vaccf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vaccf: +; CHECK: vaccf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vaccf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VACCG. +define <2 x i64> @test_vaccg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vaccg: +; CHECK: vaccg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vaccg(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %res +} + +; VAQ. +define <16 x i8> @test_vaq(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vaq: +; CHECK: vaq %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vaq(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VACQ. +define <16 x i8> @test_vacq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vacq: +; CHECK: vacq %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vacq(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VACCQ. +define <16 x i8> @test_vaccq(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vaccq: +; CHECK: vaccq %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vaccq(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VACCCQ. +define <16 x i8> @test_vacccq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vacccq: +; CHECK: vacccq %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vacccq(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VAVGB. +define <16 x i8> @test_vavgb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vavgb: +; CHECK: vavgb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vavgb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VAVGH. +define <8 x i16> @test_vavgh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vavgh: +; CHECK: vavgh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vavgh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VAVGF. +define <4 x i32> @test_vavgf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vavgf: +; CHECK: vavgf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vavgf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VAVGG. +define <2 x i64> @test_vavgg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vavgg: +; CHECK: vavgg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vavgg(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %res +} + +; VAVGLB. +define <16 x i8> @test_vavglb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vavglb: +; CHECK: vavglb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vavglb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VAVGLH. +define <8 x i16> @test_vavglh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vavglh: +; CHECK: vavglh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vavglh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VAVGLF. +define <4 x i32> @test_vavglf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vavglf: +; CHECK: vavglf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vavglf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VAVGLG. +define <2 x i64> @test_vavglg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vavglg: +; CHECK: vavglg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vavglg(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %res +} + +; VCKSM. +define <4 x i32> @test_vcksm(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vcksm: +; CHECK: vcksm %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vcksm(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VGFMB. +define <8 x i16> @test_vgfmb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vgfmb: +; CHECK: vgfmb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %a, <16 x i8> %b) + ret <8 x i16> %res +} + +; VGFMH. +define <4 x i32> @test_vgfmh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vgfmh: +; CHECK: vgfmh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %a, <8 x i16> %b) + ret <4 x i32> %res +} + +; VGFMF. +define <2 x i64> @test_vgfmf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vgfmf: +; CHECK: vgfmf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %a, <4 x i32> %b) + ret <2 x i64> %res +} + +; VGFMG. +define <16 x i8> @test_vgfmg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vgfmg: +; CHECK: vgfmg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %a, <2 x i64> %b) + ret <16 x i8> %res +} + +; VGFMAB. +define <8 x i16> @test_vgfmab(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vgfmab: +; CHECK: vgfmab %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %a, <16 x i8> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VGFMAH. +define <4 x i32> @test_vgfmah(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vgfmah: +; CHECK: vgfmah %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %a, <8 x i16> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VGFMAF. +define <2 x i64> @test_vgfmaf(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vgfmaf: +; CHECK: vgfmaf %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %a, <4 x i32> %b, + <2 x i64> %c) + ret <2 x i64> %res +} + +; VGFMAG. +define <16 x i8> @test_vgfmag(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vgfmag: +; CHECK: vgfmag %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %a, <2 x i64> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VMAHB. +define <16 x i8> @test_vmahb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vmahb: +; CHECK: vmahb %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vmahb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VMAHH. +define <8 x i16> @test_vmahh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vmahh: +; CHECK: vmahh %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmahh(<8 x i16> %a, <8 x i16> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VMAHF. +define <4 x i32> @test_vmahf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vmahf: +; CHECK: vmahf %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmahf(<4 x i32> %a, <4 x i32> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VMALHB. +define <16 x i8> @test_vmalhb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vmalhb: +; CHECK: vmalhb %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VMALHH. +define <8 x i16> @test_vmalhh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vmalhh: +; CHECK: vmalhh %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %a, <8 x i16> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VMALHF. +define <4 x i32> @test_vmalhf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vmalhf: +; CHECK: vmalhf %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %a, <4 x i32> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VMAEB. +define <8 x i16> @test_vmaeb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vmaeb: +; CHECK: vmaeb %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %a, <16 x i8> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VMAEH. +define <4 x i32> @test_vmaeh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vmaeh: +; CHECK: vmaeh %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %a, <8 x i16> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VMAEF. +define <2 x i64> @test_vmaef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vmaef: +; CHECK: vmaef %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmaef(<4 x i32> %a, <4 x i32> %b, + <2 x i64> %c) + ret <2 x i64> %res +} + +; VMALEB. +define <8 x i16> @test_vmaleb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vmaleb: +; CHECK: vmaleb %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %a, <16 x i8> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VMALEH. +define <4 x i32> @test_vmaleh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vmaleh: +; CHECK: vmaleh %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %a, <8 x i16> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VMALEF. +define <2 x i64> @test_vmalef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vmalef: +; CHECK: vmalef %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmalef(<4 x i32> %a, <4 x i32> %b, + <2 x i64> %c) + ret <2 x i64> %res +} + +; VMAOB. +define <8 x i16> @test_vmaob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vmaob: +; CHECK: vmaob %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmaob(<16 x i8> %a, <16 x i8> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VMAOH. +define <4 x i32> @test_vmaoh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vmaoh: +; CHECK: vmaoh %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %a, <8 x i16> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VMAOF. +define <2 x i64> @test_vmaof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vmaof: +; CHECK: vmaof %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmaof(<4 x i32> %a, <4 x i32> %b, + <2 x i64> %c) + ret <2 x i64> %res +} + +; VMALOB. +define <8 x i16> @test_vmalob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vmalob: +; CHECK: vmalob %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmalob(<16 x i8> %a, <16 x i8> %b, + <8 x i16> %c) + ret <8 x i16> %res +} + +; VMALOH. +define <4 x i32> @test_vmaloh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vmaloh: +; CHECK: vmaloh %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %a, <8 x i16> %b, + <4 x i32> %c) + ret <4 x i32> %res +} + +; VMALOF. +define <2 x i64> @test_vmalof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vmalof: +; CHECK: vmalof %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmalof(<4 x i32> %a, <4 x i32> %b, + <2 x i64> %c) + ret <2 x i64> %res +} + +; VMHB. +define <16 x i8> @test_vmhb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmhb: +; CHECK: vmhb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vmhb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VMHH. +define <8 x i16> @test_vmhh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vmhh: +; CHECK: vmhh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmhh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VMHF. +define <4 x i32> @test_vmhf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmhf: +; CHECK: vmhf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmhf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VMLHB. +define <16 x i8> @test_vmlhb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmlhb: +; CHECK: vmlhb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VMLHH. +define <8 x i16> @test_vmlhh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vmlhh: +; CHECK: vmlhh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VMLHF. +define <4 x i32> @test_vmlhf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmlhf: +; CHECK: vmlhf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VMEB. +define <8 x i16> @test_vmeb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmeb: +; CHECK: vmeb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmeb(<16 x i8> %a, <16 x i8> %b) + ret <8 x i16> %res +} + +; VMEH. +define <4 x i32> @test_vmeh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vmeh: +; CHECK: vmeh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmeh(<8 x i16> %a, <8 x i16> %b) + ret <4 x i32> %res +} + +; VMEF. +define <2 x i64> @test_vmef(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmef: +; CHECK: vmef %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmef(<4 x i32> %a, <4 x i32> %b) + ret <2 x i64> %res +} + +; VMLEB. +define <8 x i16> @test_vmleb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmleb: +; CHECK: vmleb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmleb(<16 x i8> %a, <16 x i8> %b) + ret <8 x i16> %res +} + +; VMLEH. +define <4 x i32> @test_vmleh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vmleh: +; CHECK: vmleh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmleh(<8 x i16> %a, <8 x i16> %b) + ret <4 x i32> %res +} + +; VMLEF. +define <2 x i64> @test_vmlef(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmlef: +; CHECK: vmlef %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmlef(<4 x i32> %a, <4 x i32> %b) + ret <2 x i64> %res +} + +; VMOB. +define <8 x i16> @test_vmob(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmob: +; CHECK: vmob %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmob(<16 x i8> %a, <16 x i8> %b) + ret <8 x i16> %res +} + +; VMOH. +define <4 x i32> @test_vmoh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vmoh: +; CHECK: vmoh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmoh(<8 x i16> %a, <8 x i16> %b) + ret <4 x i32> %res +} + +; VMOF. +define <2 x i64> @test_vmof(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmof: +; CHECK: vmof %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmof(<4 x i32> %a, <4 x i32> %b) + ret <2 x i64> %res +} + +; VMLOB. +define <8 x i16> @test_vmlob(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmlob: +; CHECK: vmlob %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vmlob(<16 x i8> %a, <16 x i8> %b) + ret <8 x i16> %res +} + +; VMLOH. +define <4 x i32> @test_vmloh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vmloh: +; CHECK: vmloh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vmloh(<8 x i16> %a, <8 x i16> %b) + ret <4 x i32> %res +} + +; VMLOF. +define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmlof: +; CHECK: vmlof %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vmlof(<4 x i32> %a, <4 x i32> %b) + ret <2 x i64> %res +} + +; VERLLVB. +define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_verllvb: +; CHECK: verllvb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VERLLVH. +define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_verllvh: +; CHECK: verllvh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VERLLVF. +define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_verllvf: +; CHECK: verllvf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VERLLVG. +define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_verllvg: +; CHECK: verllvg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %res +} + +; VERLLB. +define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) { +; CHECK-LABEL: test_verllb: +; CHECK: verllb %v24, %v24, 0(%r2) +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b) + ret <16 x i8> %res +} + +; VERLLH. +define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) { +; CHECK-LABEL: test_verllh: +; CHECK: verllh %v24, %v24, 0(%r2) +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b) + ret <8 x i16> %res +} + +; VERLLF. +define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: test_verllf: +; CHECK: verllf %v24, %v24, 0(%r2) +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b) + ret <4 x i32> %res +} + +; VERLLG. +define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) { +; CHECK-LABEL: test_verllg: +; CHECK: verllg %v24, %v24, 0(%r2) +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b) + ret <2 x i64> %res +} + +; VERLLB with the smallest count. +define <16 x i8> @test_verllb_1(<16 x i8> %a) { +; CHECK-LABEL: test_verllb_1: +; CHECK: verllb %v24, %v24, 1 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1) + ret <16 x i8> %res +} + +; VERLLB with the largest count. +define <16 x i8> @test_verllb_4095(<16 x i8> %a) { +; CHECK-LABEL: test_verllb_4095: +; CHECK: verllb %v24, %v24, 4095 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095) + ret <16 x i8> %res +} + +; VERLLB with the largest count + 1. +define <16 x i8> @test_verllb_4096(<16 x i8> %a) { +; CHECK-LABEL: test_verllb_4096: +; CHECK: lhi [[REG:%r[1-5]]], 4096 +; CHECK: verllb %v24, %v24, 0([[REG]]) +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096) + ret <16 x i8> %res +} + +; VERIMB. +define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_verimb: +; CHECK: verimb %v24, %v26, %v28, 1 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1) + ret <16 x i8> %res +} + +; VERIMH. +define <8 x i16> @test_verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_verimh: +; CHECK: verimh %v24, %v26, %v28, 1 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, i32 1) + ret <8 x i16> %res +} + +; VERIMF. +define <4 x i32> @test_verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_verimf: +; CHECK: verimf %v24, %v26, %v28, 1 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 1) + ret <4 x i32> %res +} + +; VERIMG. +define <2 x i64> @test_verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_verimg: +; CHECK: verimg %v24, %v26, %v28, 1 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 1) + ret <2 x i64> %res +} + +; VERIMB with a different mask. +define <16 x i8> @test_verimb_254(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_verimb_254: +; CHECK: verimb %v24, %v26, %v28, 254 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 254) + ret <16 x i8> %res +} + +; VSL. +define <16 x i8> @test_vsl(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsl: +; CHECK: vsl %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsl(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSLB. +define <16 x i8> @test_vslb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vslb: +; CHECK: vslb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vslb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSRA. +define <16 x i8> @test_vsra(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsra: +; CHECK: vsra %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsra(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSRAB. +define <16 x i8> @test_vsrab(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsrab: +; CHECK: vsrab %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsrab(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSRL. +define <16 x i8> @test_vsrl(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsrl: +; CHECK: vsrl %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsrl(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSRLB. +define <16 x i8> @test_vsrlb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsrlb: +; CHECK: vsrlb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSLDB with the minimum useful value. +define <16 x i8> @test_vsldb_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsldb_1: +; CHECK: vsldb %v24, %v24, %v26, 1 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %res +} + +; VSLDB with the maximum value. +define <16 x i8> @test_vsldb_15(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsldb_15: +; CHECK: vsldb %v24, %v24, %v26, 15 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 15) + ret <16 x i8> %res +} + +; VSCBIB. +define <16 x i8> @test_vscbib(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vscbib: +; CHECK: vscbib %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vscbib(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSCBIH. +define <8 x i16> @test_vscbih(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vscbih: +; CHECK: vscbih %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vscbih(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VSCBIF. +define <4 x i32> @test_vscbif(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vscbif: +; CHECK: vscbif %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vscbif(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VSCBIG. +define <2 x i64> @test_vscbig(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vscbig: +; CHECK: vscbig %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vscbig(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %res +} + +; VSQ. +define <16 x i8> @test_vsq(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsq: +; CHECK: vsq %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsq(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSBIQ. +define <16 x i8> @test_vsbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vsbiq: +; CHECK: vsbiq %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VSCBIQ. +define <16 x i8> @test_vscbiq(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vscbiq: +; CHECK: vscbiq %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VSBCBIQ. +define <16 x i8> @test_vsbcbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vsbcbiq: +; CHECK: vsbcbiq %v24, %v24, %v26, %v28 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + ret <16 x i8> %res +} + +; VSUMB. +define <4 x i32> @test_vsumb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsumb: +; CHECK: vsumb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vsumb(<16 x i8> %a, <16 x i8> %b) + ret <4 x i32> %res +} + +; VSUMH. +define <4 x i32> @test_vsumh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vsumh: +; CHECK: vsumh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vsumh(<8 x i16> %a, <8 x i16> %b) + ret <4 x i32> %res +} + +; VSUMGH. +define <2 x i64> @test_vsumgh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vsumgh: +; CHECK: vsumgh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %a, <8 x i16> %b) + ret <2 x i64> %res +} + +; VSUMGF. +define <2 x i64> @test_vsumgf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsumgf: +; CHECK: vsumgf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %a, <4 x i32> %b) + ret <2 x i64> %res +} + +; VSUMQF. +define <16 x i8> @test_vsumqf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsumqf: +; CHECK: vsumqf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %a, <4 x i32> %b) + ret <16 x i8> %res +} + +; VSUMQG. +define <16 x i8> @test_vsumqg(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsumqg: +; CHECK: vsumqg %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %a, <2 x i64> %b) + ret <16 x i8> %res +} + +; VTM with no processing of the result. +define i32 @test_vtm(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vtm: +; CHECK: vtm %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b) + ret i32 %res +} + +; VTM, storing to %ptr if all bits are set. +define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { +; CHECK-LABEL: test_vtm_all_store: +; CHECK-NOT: %r +; CHECK: vtm %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b) + %cmp = icmp sge i32 %res, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret void +} + +; VCEQBS with no processing of the result. +define i32 @test_vceqbs(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vceqbs: +; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 1 + ret i32 %res +} + +; VCEQBS, returning 1 if any elements are equal (CC != 3). +define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vceqbs_any_bool: +; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -536870912 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp ne i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCEQBS, storing to %ptr if any elements are equal. +define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { +; CHECK-LABEL: test_vceqbs_any_store: +; CHECK-NOT: %r +; CHECK: vceqbs %v24, %v24, %v26 +; CHECK-NEXT: {{jo|jnle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp ule i32 %cc, 2 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <16 x i8> %res +} + +; VCEQHS with no processing of the result. +define i32 @test_vceqhs(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vceqhs: +; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 1 + ret i32 %res +} + +; VCEQHS, returning 1 if not all elements are equal. +define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vceqhs_notall_bool: +; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 36 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp sge i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCEQHS, storing to %ptr if not all elements are equal. +define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vceqhs_notall_store: +; CHECK-NOT: %r +; CHECK: vceqhs %v24, %v24, %v26 +; CHECK-NEXT: {{jhe|je}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <8 x i16> %res +} + +; VCEQFS with no processing of the result. +define i32 @test_vceqfs(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vceqfs: +; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VCEQFS, returning 1 if no elements are equal. +define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vceqfs_none_bool: +; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 35 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCEQFS, storing to %ptr if no elements are equal. +define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vceqfs_none_store: +; CHECK-NOT: %r +; CHECK: vceqfs %v24, %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp uge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VCEQGS with no processing of the result. +define i32 @test_vceqgs(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vceqgs: +; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VCEQGS returning 1 if all elements are equal (CC == 0). +define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vceqgs_all_bool: +; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -268435456 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp ult i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCEQGS, storing to %ptr if all elements are equal. +define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { +; CHECK-LABEL: test_vceqgs_all_store: +; CHECK-NOT: %r +; CHECK: vceqgs %v24, %v24, %v26 +; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 0 + %cc = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp sle i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <2 x i64> %res +} + +; VCHBS with no processing of the result. +define i32 @test_vchbs(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vchbs: +; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 1 + ret i32 %res +} + +; VCHBS, returning 1 if any elements are higher (CC != 3). +define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vchbs_any_bool: +; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -536870912 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp ne i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHBS, storing to %ptr if any elements are higher. +define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { +; CHECK-LABEL: test_vchbs_any_store: +; CHECK-NOT: %r +; CHECK: vchbs %v24, %v24, %v26 +; CHECK-NEXT: {{jo|jnle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp ule i32 %cc, 2 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <16 x i8> %res +} + +; VCHHS with no processing of the result. +define i32 @test_vchhs(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vchhs: +; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 1 + ret i32 %res +} + +; VCHHS, returning 1 if not all elements are higher. +define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vchhs_notall_bool: +; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 36 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp sge i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHHS, storing to %ptr if not all elements are higher. +define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vchhs_notall_store: +; CHECK-NOT: %r +; CHECK: vchhs %v24, %v24, %v26 +; CHECK-NEXT: {{jhe|je}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <8 x i16> %res +} + +; VCHFS with no processing of the result. +define i32 @test_vchfs(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vchfs: +; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VCHFS, returning 1 if no elements are higher. +define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vchfs_none_bool: +; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 35 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHFS, storing to %ptr if no elements are higher. +define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) { +; CHECK-LABEL: test_vchfs_none_store: +; CHECK-NOT: %r +; CHECK: vchfs %v24, %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp uge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VCHGS with no processing of the result. +define i32 @test_vchgs(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vchgs: +; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VCHGS returning 1 if all elements are higher (CC == 0). +define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vchgs_all_bool: +; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -268435456 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp ult i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHGS, storing to %ptr if all elements are higher. +define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { +; CHECK-LABEL: test_vchgs_all_store: +; CHECK-NOT: %r +; CHECK: vchgs %v24, %v24, %v26 +; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 0 + %cc = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp sle i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <2 x i64> %res +} + +; VCHLBS with no processing of the result. +define i32 @test_vchlbs(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vchlbs: +; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 1 + ret i32 %res +} + +; VCHLBS, returning 1 if any elements are higher (CC != 3). +define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vchlbs_any_bool: +; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -536870912 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp ne i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHLBS, storing to %ptr if any elements are higher. +define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { +; CHECK-LABEL: test_vchlbs_any_store: +; CHECK-NOT: %r +; CHECK: vchlbs %v24, %v24, %v26 +; CHECK-NEXT: {{jo|jnle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + %cmp = icmp sle i32 %cc, 2 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <16 x i8> %res +} + +; VCHLHS with no processing of the result. +define i32 @test_vchlhs(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vchlhs: +; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 1 + ret i32 %res +} + +; VCHLHS, returning 1 if not all elements are higher. +define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vchlhs_notall_bool: +; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 36 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp uge i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHLHS, storing to %ptr if not all elements are higher. +define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vchlhs_notall_store: +; CHECK-NOT: %r +; CHECK: vchlhs %v24, %v24, %v26 +; CHECK-NEXT: {{jhe|je}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %cmp = icmp sgt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <8 x i16> %res +} + +; VCHLFS with no processing of the result. +define i32 @test_vchlfs(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vchlfs: +; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VCHLFS, returning 1 if no elements are higher. +define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vchlfs_none_bool: +; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 35 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHLFS, storing to %ptr if no elements are higher. +define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vchlfs_none_store: +; CHECK-NOT: %r +; CHECK: vchlfs %v24, %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp sge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VCHLGS with no processing of the result. +define i32 @test_vchlgs(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vchlgs: +; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VCHLGS returning 1 if all elements are higher (CC == 0). +define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vchlgs_all_bool: +; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -268435456 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp slt i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VCHLGS, storing to %ptr if all elements are higher. +define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { +; CHECK-LABEL: test_vchlgs_all_store: +; CHECK-NOT: %r +; CHECK: vchlgs %v24, %v24, %v26 +; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) + %res = extractvalue {<2 x i64>, i32} %call, 0 + %cc = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp ule i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <2 x i64> %res +} + +; VFAEB with !IN !RT. +define <16 x i8> @test_vfaeb_0(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaeb_0: +; CHECK: vfaeb %v24, %v24, %v26, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 0) + ret <16 x i8> %res +} + +; VFAEB with !IN RT. +define <16 x i8> @test_vfaeb_4(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaeb_4: +; CHECK: vfaeb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 4) + ret <16 x i8> %res +} + +; VFAEB with IN !RT. +define <16 x i8> @test_vfaeb_8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaeb_8: +; CHECK: vfaeb %v24, %v24, %v26, 8 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 8) + ret <16 x i8> %res +} + +; VFAEB with IN RT. +define <16 x i8> @test_vfaeb_12(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaeb_12: +; CHECK: vfaeb %v24, %v24, %v26, 12 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 12) + ret <16 x i8> %res +} + +; VFAEB with CS -- should be ignored. +define <16 x i8> @test_vfaeb_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaeb_1: +; CHECK: vfaeb %v24, %v24, %v26, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %res +} + +; VFAEH. +define <8 x i16> @test_vfaeh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vfaeh: +; CHECK: vfaeh %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %a, <8 x i16> %b, i32 4) + ret <8 x i16> %res +} + +; VFAEF. +define <4 x i32> @test_vfaef(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vfaef: +; CHECK: vfaef %v24, %v24, %v26, 8 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vfaef(<4 x i32> %a, <4 x i32> %b, i32 8) + ret <4 x i32> %res +} + +; VFAEBS. +define <16 x i8> @test_vfaebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfaebs: +; CHECK: vfaebs %v24, %v24, %v26, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8> %a, <16 x i8> %b, + i32 0) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VFAEHS. +define <8 x i16> @test_vfaehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfaehs: +; CHECK: vfaehs %v24, %v24, %v26, 4 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16> %a, <8 x i16> %b, + i32 4) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VFAEFS. +define <4 x i32> @test_vfaefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfaefs: +; CHECK: vfaefs %v24, %v24, %v26, 8 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32> %a, <4 x i32> %b, + i32 8) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VFAEZB with !IN !RT. +define <16 x i8> @test_vfaezb_0(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaezb_0: +; CHECK: vfaezb %v24, %v24, %v26, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 0) + ret <16 x i8> %res +} + +; VFAEZB with !IN RT. +define <16 x i8> @test_vfaezb_4(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaezb_4: +; CHECK: vfaezb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 4) + ret <16 x i8> %res +} + +; VFAEZB with IN !RT. +define <16 x i8> @test_vfaezb_8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaezb_8: +; CHECK: vfaezb %v24, %v24, %v26, 8 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 8) + ret <16 x i8> %res +} + +; VFAEZB with IN RT. +define <16 x i8> @test_vfaezb_12(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaezb_12: +; CHECK: vfaezb %v24, %v24, %v26, 12 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 12) + ret <16 x i8> %res +} + +; VFAEZB with CS -- should be ignored. +define <16 x i8> @test_vfaezb_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfaezb_1: +; CHECK: vfaezb %v24, %v24, %v26, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %res +} + +; VFAEZH. +define <8 x i16> @test_vfaezh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vfaezh: +; CHECK: vfaezh %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %a, <8 x i16> %b, i32 4) + ret <8 x i16> %res +} + +; VFAEZF. +define <4 x i32> @test_vfaezf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vfaezf: +; CHECK: vfaezf %v24, %v24, %v26, 8 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %a, <4 x i32> %b, i32 8) + ret <4 x i32> %res +} + +; VFAEZBS. +define <16 x i8> @test_vfaezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfaezbs: +; CHECK: vfaezbs %v24, %v24, %v26, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8> %a, <16 x i8> %b, + i32 0) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VFAEZHS. +define <8 x i16> @test_vfaezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfaezhs: +; CHECK: vfaezhs %v24, %v24, %v26, 4 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16> %a, <8 x i16> %b, + i32 4) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VFAEZFS. +define <4 x i32> @test_vfaezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfaezfs: +; CHECK: vfaezfs %v24, %v24, %v26, 8 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32> %a, <4 x i32> %b, + i32 8) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VFEEB. +define <16 x i8> @test_vfeeb_0(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfeeb_0: +; CHECK: vfeeb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VFEEH. +define <8 x i16> @test_vfeeh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vfeeh: +; CHECK: vfeeh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VFEEF. +define <4 x i32> @test_vfeef(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vfeef: +; CHECK: vfeef %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vfeef(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VFEEBS. +define <16 x i8> @test_vfeebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfeebs: +; CHECK: vfeebs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VFEEHS. +define <8 x i16> @test_vfeehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfeehs: +; CHECK: vfeehs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VFEEFS. +define <4 x i32> @test_vfeefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfeefs: +; CHECK: vfeefs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VFEEZB. +define <16 x i8> @test_vfeezb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfeezb: +; CHECK: vfeezb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VFEEZH. +define <8 x i16> @test_vfeezh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vfeezh: +; CHECK: vfeezh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VFEEZF. +define <4 x i32> @test_vfeezf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vfeezf: +; CHECK: vfeezf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VFEEZBS. +define <16 x i8> @test_vfeezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfeezbs: +; CHECK: vfeezbs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VFEEZHS. +define <8 x i16> @test_vfeezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfeezhs: +; CHECK: vfeezhs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VFEEZFS. +define <4 x i32> @test_vfeezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfeezfs: +; CHECK: vfeezfs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VFENEB. +define <16 x i8> @test_vfeneb_0(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfeneb_0: +; CHECK: vfeneb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VFENEH. +define <8 x i16> @test_vfeneh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vfeneh: +; CHECK: vfeneh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VFENEF. +define <4 x i32> @test_vfenef(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vfenef: +; CHECK: vfenef %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vfenef(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VFENEBS. +define <16 x i8> @test_vfenebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfenebs: +; CHECK: vfenebs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VFENEHS. +define <8 x i16> @test_vfenehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfenehs: +; CHECK: vfenehs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VFENEFS. +define <4 x i32> @test_vfenefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfenefs: +; CHECK: vfenefs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VFENEZB. +define <16 x i8> @test_vfenezb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vfenezb: +; CHECK: vfenezb %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +; VFENEZH. +define <8 x i16> @test_vfenezh(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vfenezh: +; CHECK: vfenezh %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %res +} + +; VFENEZF. +define <4 x i32> @test_vfenezf(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vfenezf: +; CHECK: vfenezf %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +; VFENEZBS. +define <16 x i8> @test_vfenezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfenezbs: +; CHECK: vfenezbs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8> %a, <16 x i8> %b) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VFENEZHS. +define <8 x i16> @test_vfenezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfenezhs: +; CHECK: vfenezhs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16> %a, <8 x i16> %b) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VFENEZFS. +define <4 x i32> @test_vfenezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { +; CHECK-LABEL: test_vfenezfs: +; CHECK: vfenezfs %v24, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32> %a, <4 x i32> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VISTRB. +define <16 x i8> @test_vistrb(<16 x i8> %a) { +; CHECK-LABEL: test_vistrb: +; CHECK: vistrb %v24, %v24 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vistrb(<16 x i8> %a) + ret <16 x i8> %res +} + +; VISTRH. +define <8 x i16> @test_vistrh(<8 x i16> %a) { +; CHECK-LABEL: test_vistrh: +; CHECK: vistrh %v24, %v24 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vistrh(<8 x i16> %a) + ret <8 x i16> %res +} + +; VISTRF. +define <4 x i32> @test_vistrf(<4 x i32> %a) { +; CHECK-LABEL: test_vistrf: +; CHECK: vistrf %v24, %v24 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vistrf(<4 x i32> %a) + ret <4 x i32> %res +} + +; VISTRBS. +define <16 x i8> @test_vistrbs(<16 x i8> %a, i32 *%ccptr) { +; CHECK-LABEL: test_vistrbs: +; CHECK: vistrbs %v24, %v24 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8> %a) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VISTRHS. +define <8 x i16> @test_vistrhs(<8 x i16> %a, i32 *%ccptr) { +; CHECK-LABEL: test_vistrhs: +; CHECK: vistrhs %v24, %v24 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16> %a) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VISTRFS. +define <4 x i32> @test_vistrfs(<4 x i32> %a, i32 *%ccptr) { +; CHECK-LABEL: test_vistrfs: +; CHECK: vistrfs %v24, %v24 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32> %a) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VSTRCB with !IN !RT. +define <16 x i8> @test_vstrcb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrcb_0: +; CHECK: vstrcb %v24, %v24, %v26, %v28, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 0) + ret <16 x i8> %res +} + +; VSTRCB with !IN RT. +define <16 x i8> @test_vstrcb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrcb_4: +; CHECK: vstrcb %v24, %v24, %v26, %v28, 4 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 4) + ret <16 x i8> %res +} + +; VSTRCB with IN !RT. +define <16 x i8> @test_vstrcb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrcb_8: +; CHECK: vstrcb %v24, %v24, %v26, %v28, 8 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 8) + ret <16 x i8> %res +} + +; VSTRCB with IN RT. +define <16 x i8> @test_vstrcb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrcb_12: +; CHECK: vstrcb %v24, %v24, %v26, %v28, 12 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 12) + ret <16 x i8> %res +} + +; VSTRCB with CS -- should be ignored. +define <16 x i8> @test_vstrcb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrcb_1: +; CHECK: vstrcb %v24, %v24, %v26, %v28, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 1) + ret <16 x i8> %res +} + +; VSTRCH. +define <8 x i16> @test_vstrch(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vstrch: +; CHECK: vstrch %v24, %v24, %v26, %v28, 4 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vstrch(<8 x i16> %a, <8 x i16> %b, + <8 x i16> %c, i32 4) + ret <8 x i16> %res +} + +; VSTRCF. +define <4 x i32> @test_vstrcf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vstrcf: +; CHECK: vstrcf %v24, %v24, %v26, %v28, 8 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %a, <4 x i32> %b, + <4 x i32> %c, i32 8) + ret <4 x i32> %res +} + +; VSTRCBS. +define <16 x i8> @test_vstrcbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrcbs: +; CHECK: vstrcbs %v24, %v24, %v26, %v28, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 0) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRCHS. +define <8 x i16> @test_vstrchs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrchs: +; CHECK: vstrchs %v24, %v24, %v26, %v28, 4 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16> %a, <8 x i16> %b, + <8 x i16> %c, i32 4) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VSTRCFS. +define <4 x i32> @test_vstrcfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrcfs: +; CHECK: vstrcfs %v24, %v24, %v26, %v28, 8 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32> %a, <4 x i32> %b, + <4 x i32> %c, i32 8) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VSTRCZB with !IN !RT. +define <16 x i8> @test_vstrczb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrczb_0: +; CHECK: vstrczb %v24, %v24, %v26, %v28, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 0) + ret <16 x i8> %res +} + +; VSTRCZB with !IN RT. +define <16 x i8> @test_vstrczb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrczb_4: +; CHECK: vstrczb %v24, %v24, %v26, %v28, 4 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 4) + ret <16 x i8> %res +} + +; VSTRCZB with IN !RT. +define <16 x i8> @test_vstrczb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrczb_8: +; CHECK: vstrczb %v24, %v24, %v26, %v28, 8 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 8) + ret <16 x i8> %res +} + +; VSTRCZB with IN RT. +define <16 x i8> @test_vstrczb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrczb_12: +; CHECK: vstrczb %v24, %v24, %v26, %v28, 12 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 12) + ret <16 x i8> %res +} + +; VSTRCZB with CS -- should be ignored. +define <16 x i8> @test_vstrczb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vstrczb_1: +; CHECK: vstrczb %v24, %v24, %v26, %v28, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 1) + ret <16 x i8> %res +} + +; VSTRCZH. +define <8 x i16> @test_vstrczh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_vstrczh: +; CHECK: vstrczh %v24, %v24, %v26, %v28, 4 +; CHECK: br %r14 + %res = call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %a, <8 x i16> %b, + <8 x i16> %c, i32 4) + ret <8 x i16> %res +} + +; VSTRCZF. +define <4 x i32> @test_vstrczf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vstrczf: +; CHECK: vstrczf %v24, %v24, %v26, %v28, 8 +; CHECK: br %r14 + %res = call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %a, <4 x i32> %b, + <4 x i32> %c, i32 8) + ret <4 x i32> %res +} + +; VSTRCZBS. +define <16 x i8> @test_vstrczbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrczbs: +; CHECK: vstrczbs %v24, %v24, %v26, %v28, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c, i32 0) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRCZHS. +define <8 x i16> @test_vstrczhs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrczhs: +; CHECK: vstrczhs %v24, %v24, %v26, %v28, 4 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16> %a, <8 x i16> %b, + <8 x i16> %c, i32 4) + %res = extractvalue {<8 x i16>, i32} %call, 0 + %cc = extractvalue {<8 x i16>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <8 x i16> %res +} + +; VSTRCZFS. +define <4 x i32> @test_vstrczfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrczfs: +; CHECK: vstrczfs %v24, %v24, %v26, %v28, 8 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32> %a, <4 x i32> %b, + <4 x i32> %c, i32 8) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <4 x i32> %res +} + +; VFCEDBS with no processing of the result. +define i32 @test_vfcedbs(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfcedbs: +; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VFCEDBS, returning 1 if any elements are equal (CC != 3). +define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfcedbs_any_bool: +; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -536870912 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp ne i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCEDBS, storing to %ptr if any elements are equal. +define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfcedbs_any_store: +; CHECK-NOT: %r +; CHECK: vfcedbs %v24, %v24, %v26 +; CHECK-NEXT: {{jo|jnle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 0 + %cc = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp ule i32 %cc, 2 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <2 x i64> %res +} + +; VFCHDBS with no processing of the result. +define i32 @test_vfchdbs(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfchdbs: +; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VFCHDBS, returning 1 if not all elements are higher. +define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfchdbs_notall_bool: +; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 36 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp sge i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCHDBS, storing to %ptr if not all elements are higher. +define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfchdbs_notall_store: +; CHECK-NOT: %r +; CHECK: vfchdbs %v24, %v24, %v26 +; CHECK-NEXT: {{jhe|je}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 0 + %cc = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <2 x i64> %res +} + +; VFCHEDBS with no processing of the result. +define i32 @test_vfchedbs(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfchedbs: +; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VFCHEDBS, returning 1 if neither element is higher or equal. +define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfchedbs_none_bool: +; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 35 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp eq i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCHEDBS, storing to %ptr if neither element is higher or equal. +define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfchedbs_none_store: +; CHECK-NOT: %r +; CHECK: vfchedbs %v24, %v24, %v26 +; CHECK-NEXT: {{jno|jle}} {{\.L*}} +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, + <2 x double> %b) + %res = extractvalue {<2 x i64>, i32} %call, 0 + %cc = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp uge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <2 x i64> %res +} + +; VFTCIDB with the lowest useful class selector and no processing of the result. +define i32 @test_vftcidb(<2 x double> %a) { +; CHECK-LABEL: test_vftcidb: +; CHECK: vftcidb {{%v[0-9]+}}, %v24, 1 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 1) + %res = extractvalue {<2 x i64>, i32} %call, 1 + ret i32 %res +} + +; VFTCIDB with the highest useful class selector, returning 1 if all elements +; have the right class (CC == 0). +define i32 @test_vftcidb_all_bool(<2 x double> %a) { +; CHECK-LABEL: test_vftcidb_all_bool: +; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094 +; CHECK: afi %r2, -268435456 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094) + %res = extractvalue {<2 x i64>, i32} %call, 1 + %cmp = icmp eq i32 %res, 0 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFIDB with a rounding mode not usable via standard intrinsics. +define <2 x double> @test_vfidb_0_4(<2 x double> %a) { +; CHECK-LABEL: test_vfidb_0_4: +; CHECK: vfidb %v24, %v24, 0, 4 +; CHECK: br %r14 + %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 0, i32 4) + ret <2 x double> %res +} + +; VFIDB with IEEE-inexact exception suppressed. +define <2 x double> @test_vfidb_4_0(<2 x double> %a) { +; CHECK-LABEL: test_vfidb_4_0: +; CHECK: vfidb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 4, i32 0) + ret <2 x double> %res +} +