[PowerPC] Implement the vpopcnt instructions for POWER8

Patch by Kit Barton.

Add the vector population count instructions for byte, halfword, word,
and doubleword sizes.  There are two major changes here:

    PPCISelLowering.cpp: Make CTPOP legal for vector types.
    PPCRegisterInfo.td: Added v2i64 to the VRRC register
      definition. This is needed for the doubleword variations of the
      integer ops that were added in P8. 

Test Plan

Test the instruction vpcnt* encoding/decoding in ppc64-encoding-vmx.s

Test the generation of the vpopcnt instructions for various vector
data types.  When adding the v2i64 type to the Vector Register set, I
also needed to add the appropriate bit conversion patterns between
v2i64 and the existing vector types.  Testing for these conversions
were also added in the test case by passing a different vector type as
a parameter into the test functions.  There is also a run step that
will ensure the vpopcnt instructions are generated when the vsx
feature is disabled.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228046 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2015-02-03 21:58:23 +00:00
parent 04e540582b
commit 8c775a4e7b
8 changed files with 74 additions and 6 deletions

View File

@ -109,9 +109,12 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureAltivec]>;
[FeatureVSX, FeatureP8Altivec]>;
def FeatureInvariantFunctionDescriptors :
SubtargetFeature<"invariant-function-descriptors",
@ -260,9 +263,9 @@ def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
*/
def ProcessorFeatures {
list<SubtargetFeature> Power8FeatureList =
[DirectivePwr8, FeatureAltivec, FeatureVSX, FeatureP8Vector,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
[DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX,
FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt,
FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,

View File

@ -401,6 +401,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
// Vector popcnt instructions introduced in P8
if (Subtarget.hasP8Altivec())
setOperationAction(ISD::CTPOP, VT, Legal);
else
setOperationAction(ISD::CTPOP, VT, Expand);
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
@ -455,7 +461,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
@ -593,6 +598,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
if (Subtarget.hasP8Altivec())
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
}
if (Subtarget.has64BitSupport())

View File

@ -791,18 +791,27 @@ def : Pat<(store v4i32:$rS, xoaddr:$dst),
def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
// Shuffles.
@ -929,3 +938,19 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
let Predicates = [HasP8Altivec] in {
// Population Count
def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB),
"vpopcntb $vD, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (ctpop v16i8:$vB))]>;
def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB),
"vpopcnth $vD, $vB", IIC_VecGeneral,
[(set v8i16:$vD, (ctpop v8i16:$vB))]>;
def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB),
"vpopcntw $vD, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (ctpop v4i32:$vB))]>;
def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
"vpopcntd $vD, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (ctpop v2i64:$vB))]>;
} // end HasP8Altivec

View File

@ -275,7 +275,7 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
(sequence "F%u", 31, 14))>;
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v4f32], 128,
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;

View File

@ -65,6 +65,7 @@ void PPCSubtarget::initializeEnvironment() {
HasQPX = false;
HasVSX = false;
HasP8Vector = false;
HasP8Altivec = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;

View File

@ -89,6 +89,7 @@ protected:
bool HasQPX;
bool HasVSX;
bool HasP8Vector;
bool HasP8Altivec;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@ -216,6 +217,7 @@ public:
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }

View File

@ -501,6 +501,18 @@
# CHECK: vrsqrtefp 2, 3
0x10 0x40 0x19 0x4a
# CHECK: vpopcntb 2, 3
0x10 0x40 0x1f 0x03
# CHECK: vpopcnth 2, 3
0x10 0x40 0x1f 0x43
# CHECK: vpopcntw 2, 3
0x10 0x40 0x1f 0x83
# CHECK: vpopcntd 2, 3
0x10 0x40 0x1f 0xc3
# CHECK: mtvscr 2
0x10 0x00 0x16 0x44

View File

@ -543,6 +543,23 @@
# CHECK-LE: vrsqrtefp 2, 3 # encoding: [0x4a,0x19,0x40,0x10]
vrsqrtefp 2, 3
# Vector population count instructions
# CHECK-BE: vpopcntb 2, 3 # encoding: [0x10,0x40,0x1f,0x03]
# CHECK-LE: vpopcntb 2, 3 # encoding: [0x03,0x1f,0x40,0x10]
vpopcntb 2, 3
# CHECK-BE: vpopcnth 2, 3 # encoding: [0x10,0x40,0x1f,0x43]
# CHECK-LE: vpopcnth 2, 3 # encoding: [0x43,0x1f,0x40,0x10]
vpopcnth 2, 3
# CHECK-BE: vpopcntw 2, 3 # encoding: [0x10,0x40,0x1f,0x83]
# CHECK-LE: vpopcntw 2, 3 # encoding: [0x83,0x1f,0x40,0x10]
vpopcntw 2, 3
# BCHECK-BE: vpopcntd 2, 3 # encoding: [0x10,0x40,0x1f,0xC3]
# BCHECK-LE: vpopcntd 2, 3 # encoding: [0xC3,0x1f,0x40,0x10]
# vpopcntd 2, 3
# Vector status and control register instructions
# CHECK-BE: mtvscr 2 # encoding: [0x10,0x00,0x16,0x44]