diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 55e9fc0592d..2782b63a78d 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -118,11 +118,6 @@ such as ICM and STCM. -- -We could make more use of the ROTATE AND ... SELECTED BITS instructions. -At the moment we only use RISBG, and only then for subword atomic operations. - --- - DAGCombiner can detect integer absolute, but there's not yet an associated ISD opcode. We could add one and implement it using LOAD POSITIVE. Negated absolutes could use LOAD NEGATIVE. diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 0f9a37ed0cf..8866253484e 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -97,15 +97,24 @@ static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; } -// Represents operands 2 to 5 of a ROTATE AND ... SELECTED BITS operation. -// The operands are: Input (R2), Start (I3), End (I4) and Rotate (I5). -// The operand value is effectively (and (rotl Input Rotate) Mask) and -// has BitSize bits. +// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation +// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and +// Rotate (I5). The combined operand value is effectively: +// +// (or (rotl Input, Rotate), ~Mask) +// +// for RNSBG and: +// +// (and (rotl Input, Rotate), Mask) +// +// otherwise. The value has BitSize bits. struct RxSBGOperands { - RxSBGOperands(SDValue N) - : BitSize(N.getValueType().getSizeInBits()), Mask(allOnes(BitSize)), - Input(N), Start(64 - BitSize), End(63), Rotate(0) {} + RxSBGOperands(unsigned Op, SDValue N) + : Opcode(Op), BitSize(N.getValueType().getSizeInBits()), + Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), + Rotate(0) {} + unsigned Opcode; unsigned BitSize; uint64_t Mask; SDValue Input; @@ -671,6 +680,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { unsigned Opcode = N.getOpcode(); switch (Opcode) { case ISD::AND: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + ConstantSDNode *MaskNode = dyn_cast(N.getOperand(1).getNode()); if (!MaskNode) @@ -692,6 +704,31 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { return true; } + case ISD::OR: { + if (RxSBG.Opcode != SystemZ::RNSBG) + return false; + + ConstantSDNode *MaskNode = + dyn_cast(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = ~MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known ones, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask &= ~KnownOne.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + case ISD::ROTL: { // Any 64-bit rotate left can be merged into the RxSBG. if (RxSBG.BitSize != 64) @@ -707,18 +744,26 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { } case ISD::SHL: { - // Treat (shl X, count) as (and (rotl X, count), ~0<(N.getOperand(1).getNode()); if (!CountNode) return false; uint64_t Count = CountNode->getZExtValue(); - if (Count < 1 || - Count >= RxSBG.BitSize || - !refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count)) + if (Count < 1 || Count >= RxSBG.BitSize) return false; + if (RxSBG.Opcode == SystemZ::RNSBG) { + // Treat (shl X, count) as (rotl X, size-count) as long as the bottom + // count bits from RxSBG.Input are ignored. + if (shiftedInBitsMatter(RxSBG, Count, true)) + return false; + } else { + // Treat (shl X, count) as (and (rotl X, count), ~0<= RxSBG.BitSize) return false; - if (Opcode == ISD::SRA) { - // Treat (sra X, count) as (rotl X, size-count) as long as the top - // Count bits from RxSBG.Input are ignored. + if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { + // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top + // count bits from RxSBG.Input are ignored. if (shiftedInBitsMatter(RxSBG, Count, false)) return false; } else { @@ -779,7 +824,7 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { } SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { - RxSBGOperands RISBG(SDValue(N, 0)); + RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); unsigned Count = 0; while (expandRxSBG(RISBG)) Count += 1; @@ -811,7 +856,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { // Try treating each operand of N as the second operand of the RxSBG // and see which goes deepest. - RxSBGOperands RxSBG[] = { N->getOperand(0), N->getOperand(1) }; + RxSBGOperands RxSBG[] = { + RxSBGOperands(Opcode, N->getOperand(0)), + RxSBGOperands(Opcode, N->getOperand(1)) + }; unsigned Count[] = { 0, 0 }; for (unsigned I = 0; I < 2; ++I) while (expandRxSBG(RxSBG[I])) @@ -941,6 +989,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { break; case ISD::AND: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RNSBG); + // Fall through. case ISD::ROTL: case ISD::SHL: case ISD::SRL: diff --git a/test/CodeGen/SystemZ/rnsbg-01.ll b/test/CodeGen/SystemZ/rnsbg-01.ll new file mode 100644 index 00000000000..666aeb21e8d --- /dev/null +++ b/test/CodeGen/SystemZ/rnsbg-01.ll @@ -0,0 +1,257 @@ +; Test sequences that can use RNSBG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test a simple mask, which is a wrap-around case. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: rnsbg %r2, %r3, 59, 56, 0 +; CHECK: br %r14 + %orb = or i32 %b, 96 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: rnsbg %r2, %r3, 59, 56, 0 +; CHECK: br %r14 + %orb = or i64 %b, 96 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case where no wraparound is needed. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: rnsbg %r2, %r3, 58, 61, 0 +; CHECK: br %r14 + %orb = or i32 %b, -61 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: rnsbg %r2, %r3, 58, 61, 0 +; CHECK: br %r14 + %orb = or i64 %b, -61 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with just a left shift. This can't use RNSBG. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: sll {{%r[0-5]}} +; CHECK: nr {{%r[0-5]}} +; CHECK: br %r14 + %shrb = shl i32 %b, 20 + %and = and i32 %a, %shrb + ret i32 %and +} + +; ...and again with i64. +define i64 @f7(i64 %a, i64 %b) { +; CHECK-LABEL: f7: +; CHECK: sllg {{%r[0-5]}} +; CHECK: ngr {{%r[0-5]}} +; CHECK: br %r14 + %shrb = shl i64 %b, 20 + %and = and i64 %a, %shrb + ret i64 %and +} + +; Test a case with just a rotate. This can't use RNSBG. +define i32 @f8(i32 %a, i32 %b) { +; CHECK-LABEL: f8: +; CHECK: rll {{%r[0-5]}} +; CHECK: nr {{%r[0-5]}} +; CHECK: br %r14 + %shlb = shl i32 %b, 22 + %shrb = lshr i32 %b, 10 + %rotlb = or i32 %shlb, %shrb + %and = and i32 %a, %rotlb + ret i32 %and +} + +; ...and again with i64, which can. +define i64 @f9(i64 %a, i64 %b) { +; CHECK-LABEL: f9: +; CHECK: rnsbg %r2, %r3, 0, 63, 44 +; CHECK: br %r14 + %shlb = shl i64 %b, 44 + %shrb = lshr i64 %b, 20 + %rotlb = or i64 %shlb, %shrb + %and = and i64 %a, %rotlb + ret i64 %and +} + +; Test a case with a left shift and OR, where the OR covers all shifted bits. +; We can do the whole thing using RNSBG. +define i32 @f10(i32 %a, i32 %b) { +; CHECK-LABEL: f10: +; CHECK: rnsbg %r2, %r3, 32, 56, 7 +; CHECK: br %r14 + %shlb = shl i32 %b, 7 + %orb = or i32 %shlb, 127 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f11(i64 %a, i64 %b) { +; CHECK-LABEL: f11: +; CHECK: rnsbg %r2, %r3, 0, 56, 7 +; CHECK: br %r14 + %shlb = shl i64 %b, 7 + %orb = or i64 %shlb, 127 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a left shift and OR, where the OR doesn't cover all +; shifted bits. We can't use RNSBG for the shift, but we can for the OR +; and AND. +define i32 @f12(i32 %a, i32 %b) { +; CHECK-LABEL: f12: +; CHECK: sll %r3, 7 +; CHECK: rnsbg %r2, %r3, 32, 57, 0 +; CHECK: br %r14 + %shlb = shl i32 %b, 7 + %orb = or i32 %shlb, 63 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f13(i64 %a, i64 %b) { +; CHECK-LABEL: f13: +; CHECK: sllg [[REG:%r[01345]]], %r3, 7 +; CHECK: rnsbg %r2, [[REG]], 0, 57, 0 +; CHECK: br %r14 + %shlb = shl i64 %b, 7 + %orb = or i64 %shlb, 63 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a right shift and OR, where the OR covers all the shifted +; bits. The whole thing can be done using RNSBG. +define i32 @f14(i32 %a, i32 %b) { +; CHECK-LABEL: f14: +; CHECK: rnsbg %r2, %r3, 60, 63, 37 +; CHECK: br %r14 + %shrb = lshr i32 %b, 27 + %orb = or i32 %shrb, -16 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f15(i64 %a, i64 %b) { +; CHECK-LABEL: f15: +; CHECK: rnsbg %r2, %r3, 60, 63, 5 +; CHECK: br %r14 + %shrb = lshr i64 %b, 59 + %orb = or i64 %shrb, -16 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a right shift and OR, where the OR doesn't cover all the +; shifted bits. The shift needs to be done separately, but the OR and AND +; can use RNSBG. +define i32 @f16(i32 %a, i32 %b) { +; CHECK-LABEL: f16: +; CHECK: srl %r3, 29 +; CHECK: rnsbg %r2, %r3, 60, 63, 0 +; CHECK: br %r14 + %shrb = lshr i32 %b, 29 + %orb = or i32 %shrb, -16 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f17(i64 %a, i64 %b) { +; CHECK-LABEL: f17: +; CHECK: srlg [[REG:%r[01345]]], %r3, 61 +; CHECK: rnsbg %r2, [[REG]], 60, 63, 0 +; CHECK: br %r14 + %shrb = lshr i64 %b, 61 + %orb = or i64 %shrb, -16 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a combination involving an ASHR in which the sign bits matter. +; We can't use RNSBG for the ASHR in that case, but we can for the rest. +define i32 @f18(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f18: +; CHECK: sra %r3, 4 +; CHECK: rnsbg %r2, %r3, 32, 62, 1 +; CHECK: br %r14 + %ashrb = ashr i32 %b, 4 + store i32 %ashrb, i32 *%dest + %shlb = shl i32 %ashrb, 1 + %orb = or i32 %shlb, 1 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f19(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f19: +; CHECK: srag [[REG:%r[0145]]], %r3, 34 +; CHECK: rnsbg %r2, [[REG]], 0, 62, 1 +; CHECK: br %r14 + %ashrb = ashr i64 %b, 34 + store i64 %ashrb, i64 *%dest + %shlb = shl i64 %ashrb, 1 + %orb = or i64 %shlb, 1 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a combination involving an ASHR in which the sign bits don't matter. +define i32 @f20(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f20: +; CHECK: rnsbg %r2, %r3, 48, 62, 48 +; CHECK: br %r14 + %ashrb = ashr i32 %b, 17 + store i32 %ashrb, i32 *%dest + %shlb = shl i32 %ashrb, 1 + %orb = or i32 %shlb, -65535 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f21(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f21: +; CHECK: rnsbg %r2, %r3, 48, 62, 16 +; CHECK: br %r14 + %ashrb = ashr i64 %b, 49 + store i64 %ashrb, i64 *%dest + %shlb = shl i64 %ashrb, 1 + %orb = or i64 %shlb, -65535 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a shift, OR, and rotate where the OR covers all shifted bits. +define i64 @f22(i64 %a, i64 %b) { +; CHECK-LABEL: f22: +; CHECK: rnsbg %r2, %r3, 60, 54, 9 +; CHECK: br %r14 + %shlb = shl i64 %b, 5 + %orb = or i64 %shlb, 31 + %shlorb = shl i64 %orb, 4 + %shrorb = lshr i64 %orb, 60 + %rotlorb = or i64 %shlorb, %shrorb + %and = and i64 %a, %rotlorb + ret i64 %and +}