mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 17:39:16 +00:00
[SystemZ] Use POPCNT instruction on z196
We already exploit a number of instructions specific to z196, but not yet POPCNT. Add support for the population-count facility, MC support for the POPCNT instruction, CodeGen support for using POPCNT, and implement the getPopcntSupport TargetTransformInfo hook. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233689 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
64aa9d8b4c
commit
ee84973420
@ -163,8 +163,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
|
||||
// available, or if the operand is constant.
|
||||
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
|
||||
|
||||
// Use POPCNT on z196 and above.
|
||||
if (Subtarget.hasPopulationCount())
|
||||
setOperationAction(ISD::CTPOP, VT, Custom);
|
||||
else
|
||||
setOperationAction(ISD::CTPOP, VT, Expand);
|
||||
|
||||
// No special instructions for these.
|
||||
setOperationAction(ISD::CTPOP, VT, Expand);
|
||||
setOperationAction(ISD::CTTZ, VT, Expand);
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
|
||||
@ -2304,6 +2309,45 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT::i64, HighOp, Low32);
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
int64_t OrigBitSize = VT.getSizeInBits();
|
||||
SDLoc DL(Op);
|
||||
|
||||
// Get the known-zero mask for the operand.
|
||||
Op = Op.getOperand(0);
|
||||
APInt KnownZero, KnownOne;
|
||||
DAG.computeKnownBits(Op, KnownZero, KnownOne);
|
||||
uint64_t Mask = ~KnownZero.getZExtValue();
|
||||
|
||||
// Skip known-zero high parts of the operand.
|
||||
int64_t BitSize = OrigBitSize;
|
||||
while ((Mask & ((((uint64_t)1 << (BitSize / 2)) - 1) << (BitSize / 2))) == 0)
|
||||
BitSize = BitSize / 2;
|
||||
|
||||
// The POPCNT instruction counts the number of bits in each byte.
|
||||
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
|
||||
Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
|
||||
Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
|
||||
|
||||
// Add up per-byte counts in a binary tree. All bits of Op at
|
||||
// position larger than BitSize remain zero throughout.
|
||||
for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
|
||||
SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
|
||||
if (BitSize != OrigBitSize)
|
||||
Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
|
||||
DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
|
||||
Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
|
||||
}
|
||||
|
||||
// Extract overall result from high byte.
|
||||
if (BitSize > 8)
|
||||
Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Op is an atomic load. Lower it into a normal volatile load.
|
||||
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
@ -2554,6 +2598,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
|
||||
return lowerUDIVREM(Op, DAG);
|
||||
case ISD::OR:
|
||||
return lowerOR(Op, DAG);
|
||||
case ISD::CTPOP:
|
||||
return lowerCTPOP(Op, DAG);
|
||||
case ISD::ATOMIC_SWAP:
|
||||
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
|
||||
case ISD::ATOMIC_STORE:
|
||||
|
@ -87,6 +87,9 @@ enum {
|
||||
// the number of the register.
|
||||
EXTRACT_ACCESS,
|
||||
|
||||
// Count number of bits set in operand 0 per byte.
|
||||
POPCNT,
|
||||
|
||||
// Wrappers around the ISD opcodes of the same name. The output and
|
||||
// first input operands are GR128s. The trailing numbers are the
|
||||
// widths of the second operand in bits.
|
||||
@ -304,6 +307,7 @@ private:
|
||||
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
|
||||
|
@ -1382,6 +1382,13 @@ let Defs = [CC] in {
|
||||
def : Pat<(ctlz GR64:$src),
|
||||
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
|
||||
|
||||
// Population count. Counts bits set per byte.
|
||||
let Predicates = [FeaturePopulationCount], Defs = [CC] in {
|
||||
def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
|
||||
"popcnt\t$R1, $R2",
|
||||
[(set GR64:$R1, (z_popcnt GR64:$R2))]>;
|
||||
}
|
||||
|
||||
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
|
||||
def : Pat<(i64 (anyext GR32:$src)),
|
||||
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
|
||||
|
@ -121,6 +121,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
|
||||
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
|
||||
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
|
||||
SDT_ZExtractAccess>;
|
||||
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
|
||||
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
|
||||
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
|
||||
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
|
||||
|
@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
|
||||
"Assume that the floating-point extension facility is installed"
|
||||
>;
|
||||
|
||||
def FeaturePopulationCount : SystemZFeature<
|
||||
"population-count", "PopulationCount",
|
||||
"Assume that the population-count facility is installed"
|
||||
>;
|
||||
|
||||
def FeatureFastSerialization : SystemZFeature<
|
||||
"fast-serialization", "FastSerialization",
|
||||
"Assume that the fast-serialization facility is installed"
|
||||
@ -54,9 +59,9 @@ def : Processor<"generic", NoItineraries, []>;
|
||||
def : Processor<"z10", NoItineraries, []>;
|
||||
def : Processor<"z196", NoItineraries,
|
||||
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
|
||||
FeatureFPExtension, FeatureFastSerialization,
|
||||
FeatureInterlockedAccess1]>;
|
||||
FeatureFPExtension, FeaturePopulationCount,
|
||||
FeatureFastSerialization, FeatureInterlockedAccess1]>;
|
||||
def : Processor<"zEC12", NoItineraries,
|
||||
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
|
||||
FeatureFPExtension, FeatureFastSerialization,
|
||||
FeatureInterlockedAccess1]>;
|
||||
FeatureFPExtension, FeaturePopulationCount,
|
||||
FeatureFastSerialization, FeatureInterlockedAccess1]>;
|
||||
|
@ -38,7 +38,8 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
|
||||
const TargetMachine &TM)
|
||||
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
|
||||
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
|
||||
HasFastSerialization(false), HasInterlockedAccess1(false),
|
||||
HasPopulationCount(false), HasFastSerialization(false),
|
||||
HasInterlockedAccess1(false),
|
||||
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
|
||||
TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
|
||||
|
||||
|
@ -38,6 +38,7 @@ protected:
|
||||
bool HasLoadStoreOnCond;
|
||||
bool HasHighWord;
|
||||
bool HasFPExtension;
|
||||
bool HasPopulationCount;
|
||||
bool HasFastSerialization;
|
||||
bool HasInterlockedAccess1;
|
||||
|
||||
@ -86,6 +87,9 @@ public:
|
||||
// Return true if the target has the floating-point extension facility.
|
||||
bool hasFPExtension() const { return HasFPExtension; }
|
||||
|
||||
// Return true if the target has the population-count facility.
|
||||
bool hasPopulationCount() const { return HasPopulationCount; }
|
||||
|
||||
// Return true if the target has the fast-serialization facility.
|
||||
bool hasFastSerialization() const { return HasFastSerialization; }
|
||||
|
||||
|
@ -229,3 +229,12 @@ unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
|
||||
}
|
||||
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
|
||||
}
|
||||
|
||||
TargetTransformInfo::PopcntSupportKind
|
||||
SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
|
||||
assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
|
||||
if (ST->hasPopulationCount() && TyWidth <= 64)
|
||||
return TTI::PSK_FastHardware;
|
||||
return TTI::PSK_Software;
|
||||
}
|
||||
|
||||
|
@ -60,6 +60,8 @@ public:
|
||||
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty);
|
||||
|
||||
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
|
||||
|
||||
/// @}
|
||||
};
|
||||
|
||||
|
96
test/CodeGen/SystemZ/ctpop-01.ll
Normal file
96
test/CodeGen/SystemZ/ctpop-01.ll
Normal file
@ -0,0 +1,96 @@
|
||||
; Test population-count instruction
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32 %a)
|
||||
declare i64 @llvm.ctpop.i64(i64 %a)
|
||||
|
||||
define i32 @f1(i32 %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: popcnt %r0, %r2
|
||||
; CHECK: sllk %r1, %r0, 16
|
||||
; CHECK: ar %r1, %r0
|
||||
; CHECK: sllk %r2, %r1, 8
|
||||
; CHECK: ar %r2, %r1
|
||||
; CHECK: srl %r2, 24
|
||||
; CHECK: br %r14
|
||||
|
||||
%popcnt = call i32 @llvm.ctpop.i32(i32 %a)
|
||||
ret i32 %popcnt
|
||||
}
|
||||
|
||||
define i32 @f2(i32 %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: llhr %r0, %r2
|
||||
; CHECK: popcnt %r0, %r0
|
||||
; CHECK: risblg %r2, %r0, 16, 151, 8
|
||||
; CHECK: ar %r2, %r0
|
||||
; CHECK: srl %r2, 8
|
||||
; CHECK: br %r14
|
||||
%and = and i32 %a, 65535
|
||||
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
|
||||
ret i32 %popcnt
|
||||
}
|
||||
|
||||
define i32 @f3(i32 %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: llcr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0
|
||||
; CHECK: br %r14
|
||||
%and = and i32 %a, 255
|
||||
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
|
||||
ret i32 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f4(i64 %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: popcnt %r0, %r2
|
||||
; CHECK: sllg %r1, %r0, 32
|
||||
; CHECK: agr %r1, %r0
|
||||
; CHECK: sllg %r0, %r1, 16
|
||||
; CHECK: agr %r0, %r1
|
||||
; CHECK: sllg %r1, %r0, 8
|
||||
; CHECK: agr %r1, %r0
|
||||
; CHECK: srlg %r2, %r1, 56
|
||||
; CHECK: br %r14
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %a)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f5(i64 %a) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: llgfr %r0, %r2
|
||||
; CHECK: popcnt %r0, %r0
|
||||
; CHECK: sllg %r1, %r0, 16
|
||||
; CHECK: algfr %r0, %r1
|
||||
; CHECK: sllg %r1, %r0, 8
|
||||
; CHECK: algfr %r0, %r1
|
||||
; CHECK: srlg %r2, %r0, 24
|
||||
%and = and i64 %a, 4294967295
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f6(i64 %a) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: llghr %r0, %r2
|
||||
; CHECK: popcnt %r0, %r0
|
||||
; CHECK: risbg %r1, %r0, 48, 183, 8
|
||||
; CHECK: agr %r1, %r0
|
||||
; CHECK: srlg %r2, %r1, 8
|
||||
; CHECK: br %r14
|
||||
%and = and i64 %a, 65535
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f7(i64 %a) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: llgcr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0
|
||||
; CHECK: br %r14
|
||||
%and = and i64 %a, 255
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
@ -6334,6 +6334,18 @@
|
||||
# CHECK: pfd 15, 0
|
||||
0xe3 0xf0 0x00 0x00 0x00 0x36
|
||||
|
||||
# CHECK: popcnt %r0, %r0
|
||||
0xb9 0xe1 0x00 0x00
|
||||
|
||||
# CHECK: popcnt %r0, %r15
|
||||
0xb9 0xe1 0x00 0x0f
|
||||
|
||||
# CHECK: popcnt %r15, %r0
|
||||
0xb9 0xe1 0x00 0xf0
|
||||
|
||||
# CHECK: popcnt %r7, %r8
|
||||
0xb9 0xe1 0x00 0x78
|
||||
|
||||
# CHECK: risbg %r0, %r0, 0, 0, 0
|
||||
0xec 0x00 0x00 0x00 0x00 0x55
|
||||
|
||||
|
@ -2666,6 +2666,11 @@
|
||||
pfdrl 1, 1
|
||||
pfdrl 1, 0x100000000
|
||||
|
||||
#CHECK: error: {{(instruction requires: population-count)?}}
|
||||
#CHECK: popcnt %r0, %r0
|
||||
|
||||
popcnt %r0, %r0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: risbg %r0,%r0,0,0,-1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -1021,6 +1021,16 @@
|
||||
ork %r15,%r0,%r0
|
||||
ork %r7,%r8,%r9
|
||||
|
||||
#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00]
|
||||
#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f]
|
||||
#CHECK: popcnt %r15, %r0 # encoding: [0xb9,0xe1,0x00,0xf0]
|
||||
#CHECK: popcnt %r7, %r8 # encoding: [0xb9,0xe1,0x00,0x78]
|
||||
|
||||
popcnt %r0,%r0
|
||||
popcnt %r0,%r15
|
||||
popcnt %r15,%r0
|
||||
popcnt %r7,%r8
|
||||
|
||||
#CHECK: risbhg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d]
|
||||
#CHECK: risbhg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d]
|
||||
#CHECK: risbhg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d]
|
||||
|
Loading…
x
Reference in New Issue
Block a user