[codegen,aarch64] Add a target hook to the code generator to control

vector type legalization strategies in a more fine grained manner, and
change the legalization of several v1iN types and v1f32 to be widening
rather than scalarization on AArch64.

This fixes an assertion failure caused by scalarizing nodes like "v1i32
trunc v1i64". As v1i64 is legal it will fail to scalarize v1i32.

This also provides a foundation for other targets to have more granular
control over how vector types are legalized.

Patch by Hao Liu, reviewed by Tim Northover. I'm committing it to allow
some work to start taking place on top of this patch as it adds some
really important hooks to the backend that I'd like to immediately start
using. =]

http://reviews.llvm.org/D4322

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212242 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2014-07-03 00:23:43 +00:00
parent 1ccffdf27f
commit 70968365db
10 changed files with 98 additions and 59 deletions

View File

@ -185,10 +185,15 @@ public:
/// Return true if the target has BitExtract instructions.
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
/// Return true if a vector of the given type should be split
/// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
/// legalization.
virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; }
/// Return the preferred vector type legalization action.
virtual TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const {
// The default action for one element vectors is to scalarize
if (VT.getVectorNumElements() == 1)
return TypeScalarizeVector;
// The default action for other vectors is to promote
return TypePromoteInteger;
}
// There are two general methods for expanding a BUILD_VECTOR node:
// 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle

View File

@ -1084,24 +1084,25 @@ void TargetLoweringBase::computeRegisterProperties() {
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
MVT VT = (MVT::SimpleValueType) i;
if (isTypeLegal(VT))
continue;
// Determine if there is a legal wider type. If so, we should promote to
// that wider vector type.
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
if (NElts != 1 && !shouldSplitVectorType(VT)) {
bool IsLegalWiderType = false;
// First try to promote the elements of integer vectors. If no legal
// promotion was found, fallback to the widen-vector method.
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType)nVT;
bool IsLegalWiderType = false;
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
case TypePromoteInteger: {
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widen-vector method.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
&& SVT.getVectorNumElements() == NElts &&
isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
&& SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
&& SVT.getScalarType().isInteger()) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@ -1110,15 +1111,15 @@ void TargetLoweringBase::computeRegisterProperties() {
break;
}
}
if (IsLegalWiderType) continue;
if (IsLegalWiderType)
break;
}
case TypeWidenVector: {
// Try to widen the vector.
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType)nVT;
if (SVT.getVectorElementType() == EltVT &&
SVT.getVectorNumElements() > NElts &&
isTypeLegal(SVT)) {
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
if (SVT.getVectorElementType() == EltVT
&& SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@ -1127,27 +1128,34 @@ void TargetLoweringBase::computeRegisterProperties() {
break;
}
}
if (IsLegalWiderType) continue;
if (IsLegalWiderType)
break;
}
case TypeSplitVector:
case TypeScalarizeVector: {
MVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
NumIntermediates, RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
MVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
NumRegistersForVT[i] =
getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
MVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
unsigned NumElts = VT.getVectorNumElements();
ValueTypeActions.setTypeAction(VT,
NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
MVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
if (PreferredAction == TypeScalarizeVector)
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
else
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
}
break;
}
default:
llvm_unreachable("Unknown vector legalization action!");
}
}

View File

@ -7886,6 +7886,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
return Inst->getType()->getPrimitiveSizeInBits() <= 128;
}
TargetLoweringBase::LegalizeTypeAction
AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
MVT SVT = VT.getSimpleVT();
// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
// v4i16, v2i32 instead of to promote.
if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
|| SVT == MVT::v1f32)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();

View File

@ -324,6 +324,9 @@ public:
bool shouldExpandAtomicInIR(Instruction *Inst) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
private:
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.

View File

@ -473,8 +473,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
return VT.getScalarType() == MVT::i1;
TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
return TypeSplitVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
SDValue

View File

@ -242,7 +242,8 @@ public:
// PTX always uses 32-bit shift amounts
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
bool shouldSplitVectorType(EVT VT) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here

View File

@ -270,8 +270,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32);
}
bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
return VT.getScalarType().bitsLE(MVT::i16);
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
return TypeSplitVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

View File

@ -50,7 +50,9 @@ public:
SITargetLowering(TargetMachine &tm);
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
bool *IsFast) const override;
bool shouldSplitVectorType(EVT VT) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;

View File

@ -842,7 +842,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
; CHECK-LABEL: testDUP.v1i8:
; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
; CHECK: dup v0.8b, v0.b[0]
%b = extractelement <1 x i8> %a, i32 0
%c = insertelement <8 x i8> undef, i8 %b, i32 0
%d = insertelement <8 x i8> %c, i8 %b, i32 1
@ -857,7 +857,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
; CHECK-LABEL: testDUP.v1i16:
; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
; CHECK: dup v0.8h, v0.h[0]
%b = extractelement <1 x i16> %a, i32 0
%c = insertelement <8 x i16> undef, i16 %b, i32 0
%d = insertelement <8 x i16> %c, i16 %b, i32 1
@ -872,7 +872,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
; CHECK-LABEL: testDUP.v1i32:
; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
; CHECK: dup v0.4s, v0.s[0]
%b = extractelement <1 x i32> %a, i32 0
%c = insertelement <4 x i32> undef, i32 %b, i32 0
%d = insertelement <4 x i32> %c, i32 %b, i32 1
@ -1411,35 +1411,35 @@ define <16 x i8> @concat_vector_v16i8_const() {
define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
; CHECK-LABEL: concat_vector_v4i16:
; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
; CHECK: dup v0.4h, v0.h[0]
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r
}
define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
; CHECK-LABEL: concat_vector_v4i32:
; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
; CHECK: dup v0.4s, v0.s[0]
%r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %r
}
define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
; CHECK-LABEL: concat_vector_v8i8:
; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
; CHECK: dup v0.8b, v0.b[0]
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %r
}
define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
; CHECK-LABEL: concat_vector_v8i16:
; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
; CHECK: dup v0.8h, v0.h[0]
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %r
}
define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
; CHECK-LABEL: concat_vector_v16i8:
; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
; CHECK: dup v0.16b, v0.b[0]
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %r
}

View File

@ -136,8 +136,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d )
define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
; CHECK-LABEL: test_select_cc_v1f32:
; CHECK: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, eq
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
ret <1 x float> %e