mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-17 15:38:40 +00:00
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e6e0018d3e
commit
e6f7c267df
@ -214,24 +214,59 @@ public:
|
|||||||
/// ValueTypeActions - For each value type, keep a LegalizeAction enum
|
/// ValueTypeActions - For each value type, keep a LegalizeAction enum
|
||||||
/// that indicates how instruction selection should deal with the type.
|
/// that indicates how instruction selection should deal with the type.
|
||||||
uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
|
uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
|
||||||
|
|
||||||
|
LegalizeAction getExtendedTypeAction(EVT VT) const {
|
||||||
|
// Handle non-vector integers.
|
||||||
|
if (!VT.isVector()) {
|
||||||
|
assert(VT.isInteger() && "Unsupported extended type!");
|
||||||
|
unsigned BitSize = VT.getSizeInBits();
|
||||||
|
// First promote to a power-of-two size, then expand if necessary.
|
||||||
|
if (BitSize < 8 || !isPowerOf2_32(BitSize))
|
||||||
|
return Promote;
|
||||||
|
return Expand;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a type smaller than a legal vector type, promote to that
|
||||||
|
// type, e.g. <2 x float> -> <4 x float>.
|
||||||
|
if (VT.getVectorElementType().isSimple() &&
|
||||||
|
VT.getVectorNumElements() != 1) {
|
||||||
|
MVT EltType = VT.getVectorElementType().getSimpleVT();
|
||||||
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
|
while (1) {
|
||||||
|
// Round up to the nearest power of 2.
|
||||||
|
NumElts = (unsigned)NextPowerOf2(NumElts);
|
||||||
|
|
||||||
|
MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
|
||||||
|
if (LargerVector == MVT()) break;
|
||||||
|
|
||||||
|
// If this the larger type is legal, promote to it.
|
||||||
|
if (getTypeAction(LargerVector) == Legal) return Promote;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return VT.isPow2VectorType() ? Expand : Promote;
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
ValueTypeActionImpl() {
|
ValueTypeActionImpl() {
|
||||||
std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
|
std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// FIXME: This Context argument is now dead, zap it.
|
||||||
LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
|
LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
|
||||||
if (VT.isExtended()) {
|
return getTypeAction(VT);
|
||||||
if (VT.isVector()) {
|
|
||||||
return VT.isPow2VectorType() ? Expand : Promote;
|
|
||||||
}
|
|
||||||
if (VT.isInteger())
|
|
||||||
// First promote to a power-of-two size, then expand if necessary.
|
|
||||||
return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
|
|
||||||
assert(0 && "Unsupported extended type!");
|
|
||||||
return Legal;
|
|
||||||
}
|
|
||||||
unsigned I = VT.getSimpleVT().SimpleTy;
|
|
||||||
return (LegalizeAction)ValueTypeActions[I];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LegalizeAction getTypeAction(EVT VT) const {
|
||||||
|
if (!VT.isExtended())
|
||||||
|
return getTypeAction(VT.getSimpleVT());
|
||||||
|
return getExtendedTypeAction(VT);
|
||||||
|
}
|
||||||
|
|
||||||
|
LegalizeAction getTypeAction(MVT VT) const {
|
||||||
|
return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void setTypeAction(EVT VT, LegalizeAction Action) {
|
void setTypeAction(EVT VT, LegalizeAction Action) {
|
||||||
unsigned I = VT.getSimpleVT().SimpleTy;
|
unsigned I = VT.getSimpleVT().SimpleTy;
|
||||||
ValueTypeActions[I] = Action;
|
ValueTypeActions[I] = Action;
|
||||||
|
@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
|
|||||||
if (PartVT == ValueVT)
|
if (PartVT == ValueVT)
|
||||||
return Val;
|
return Val;
|
||||||
|
|
||||||
if (PartVT.isVector()) // Vector/Vector bitcast.
|
if (PartVT.isVector()) {
|
||||||
|
// If the element type of the source/dest vectors are the same, but the
|
||||||
|
// parts vector has more elements than the value vector, then we have a
|
||||||
|
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
|
||||||
|
// elements we want.
|
||||||
|
if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
|
||||||
|
assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
|
||||||
|
"Cannot narrow, it would be a lossy transformation");
|
||||||
|
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
|
||||||
|
DAG.getIntPtrConstant(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vector/Vector bitcast.
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
|
return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
|
||||||
|
}
|
||||||
|
|
||||||
assert(ValueVT.getVectorElementType() == PartVT &&
|
assert(ValueVT.getVectorElementType() == PartVT &&
|
||||||
ValueVT.getVectorNumElements() == 1 &&
|
ValueVT.getVectorNumElements() == 1 &&
|
||||||
@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
|
|||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
if (NumParts == 1) {
|
if (NumParts == 1) {
|
||||||
if (PartVT != ValueVT) {
|
if (PartVT == ValueVT) {
|
||||||
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
|
// Nothing to do.
|
||||||
Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
|
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
|
||||||
} else {
|
// Bitconvert vector->vector case.
|
||||||
assert(ValueVT.getVectorElementType() == PartVT &&
|
Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
|
||||||
ValueVT.getVectorNumElements() == 1 &&
|
} else if (PartVT.isVector() &&
|
||||||
"Only trivial vector-to-scalar conversions should get here!");
|
PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
|
||||||
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
|
||||||
PartVT, Val, DAG.getIntPtrConstant(0));
|
EVT ElementVT = PartVT.getVectorElementType();
|
||||||
}
|
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
|
||||||
|
// undef elements.
|
||||||
|
SmallVector<SDValue, 16> Ops;
|
||||||
|
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
|
||||||
|
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||||
|
ElementVT, Val, DAG.getIntPtrConstant(i)));
|
||||||
|
|
||||||
|
for (unsigned i = ValueVT.getVectorNumElements(),
|
||||||
|
e = PartVT.getVectorNumElements(); i != e; ++i)
|
||||||
|
Ops.push_back(DAG.getUNDEF(ElementVT));
|
||||||
|
|
||||||
|
Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
|
||||||
|
|
||||||
|
// FIXME: Use CONCAT for 2x -> 4x.
|
||||||
|
|
||||||
|
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
|
||||||
|
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
|
||||||
|
} else {
|
||||||
|
// Vector -> scalar conversion.
|
||||||
|
assert(ValueVT.getVectorElementType() == PartVT &&
|
||||||
|
ValueVT.getVectorNumElements() == 1 &&
|
||||||
|
"Only trivial vector-to-scalar conversions should get here!");
|
||||||
|
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||||
|
PartVT, Val, DAG.getIntPtrConstant(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
Parts[0] = Val;
|
Parts[0] = Val;
|
||||||
@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
|
|||||||
DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
|
DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
|
||||||
else
|
else
|
||||||
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||||
IntermediateVT, Val,
|
IntermediateVT, Val, DAG.getIntPtrConstant(i));
|
||||||
DAG.getIntPtrConstant(i));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split the intermediate operands into legal parts.
|
// Split the intermediate operands into legal parts.
|
||||||
|
@ -697,6 +697,7 @@ TargetLowering::findRepresentativeClass(EVT VT) const {
|
|||||||
return std::make_pair(BestRC, 1);
|
return std::make_pair(BestRC, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// computeRegisterProperties - Once all of the register classes are added,
|
/// computeRegisterProperties - Once all of the register classes are added,
|
||||||
/// this allows us to compute derived properties we expose.
|
/// this allows us to compute derived properties we expose.
|
||||||
void TargetLowering::computeRegisterProperties() {
|
void TargetLowering::computeRegisterProperties() {
|
||||||
@ -782,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
|
|||||||
MVT VT = (MVT::SimpleValueType)i;
|
MVT VT = (MVT::SimpleValueType)i;
|
||||||
if (isTypeLegal(VT)) continue;
|
if (isTypeLegal(VT)) continue;
|
||||||
|
|
||||||
|
// Determine if there is a legal wider type. If so, we should promote to
|
||||||
|
// that wider vector type.
|
||||||
|
EVT EltVT = VT.getVectorElementType();
|
||||||
|
unsigned NElts = VT.getVectorNumElements();
|
||||||
|
if (NElts != 1) {
|
||||||
|
bool IsLegalWiderType = false;
|
||||||
|
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
|
||||||
|
EVT SVT = (MVT::SimpleValueType)nVT;
|
||||||
|
if (SVT.getVectorElementType() == EltVT &&
|
||||||
|
SVT.getVectorNumElements() > NElts &&
|
||||||
|
isTypeSynthesizable(SVT)) {
|
||||||
|
TransformToType[i] = SVT;
|
||||||
|
RegisterTypeForVT[i] = SVT;
|
||||||
|
NumRegistersForVT[i] = 1;
|
||||||
|
ValueTypeActions.setTypeAction(VT, Promote);
|
||||||
|
IsLegalWiderType = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (IsLegalWiderType) continue;
|
||||||
|
}
|
||||||
|
|
||||||
MVT IntermediateVT;
|
MVT IntermediateVT;
|
||||||
EVT RegisterVT;
|
EVT RegisterVT;
|
||||||
unsigned NumIntermediates;
|
unsigned NumIntermediates;
|
||||||
@ -790,30 +813,14 @@ void TargetLowering::computeRegisterProperties() {
|
|||||||
RegisterVT, this);
|
RegisterVT, this);
|
||||||
RegisterTypeForVT[i] = RegisterVT;
|
RegisterTypeForVT[i] = RegisterVT;
|
||||||
|
|
||||||
// Determine if there is a legal wider type.
|
EVT NVT = VT.getPow2VectorType();
|
||||||
bool IsLegalWiderType = false;
|
if (NVT == VT) {
|
||||||
EVT EltVT = VT.getVectorElementType();
|
// Type is already a power of 2. The default action is to split.
|
||||||
unsigned NElts = VT.getVectorNumElements();
|
TransformToType[i] = MVT::Other;
|
||||||
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
|
ValueTypeActions.setTypeAction(VT, Expand);
|
||||||
EVT SVT = (MVT::SimpleValueType)nVT;
|
} else {
|
||||||
if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
|
TransformToType[i] = NVT;
|
||||||
SVT.getVectorNumElements() > NElts && NElts != 1) {
|
ValueTypeActions.setTypeAction(VT, Promote);
|
||||||
TransformToType[i] = SVT;
|
|
||||||
ValueTypeActions.setTypeAction(VT, Promote);
|
|
||||||
IsLegalWiderType = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!IsLegalWiderType) {
|
|
||||||
EVT NVT = VT.getPow2VectorType();
|
|
||||||
if (NVT == VT) {
|
|
||||||
// Type is already a power of 2. The default action is to split.
|
|
||||||
TransformToType[i] = MVT::Other;
|
|
||||||
ValueTypeActions.setTypeAction(VT, Expand);
|
|
||||||
} else {
|
|
||||||
TransformToType[i] = NVT;
|
|
||||||
ValueTypeActions.setTypeAction(VT, Promote);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -857,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
|
|||||||
EVT &IntermediateVT,
|
EVT &IntermediateVT,
|
||||||
unsigned &NumIntermediates,
|
unsigned &NumIntermediates,
|
||||||
EVT &RegisterVT) const {
|
EVT &RegisterVT) const {
|
||||||
// Figure out the right, legal destination reg to copy into.
|
|
||||||
unsigned NumElts = VT.getVectorNumElements();
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
|
|
||||||
|
// If there is a wider vector type with the same element type as this one,
|
||||||
|
// we should widen to that legal vector type. This handles things like
|
||||||
|
// <2 x float> -> <4 x float>.
|
||||||
|
if (NumElts != 1 && getTypeAction(Context, VT) == Promote) {
|
||||||
|
RegisterVT = getTypeToTransformTo(Context, VT);
|
||||||
|
if (isTypeLegal(RegisterVT)) {
|
||||||
|
IntermediateVT = RegisterVT;
|
||||||
|
NumIntermediates = 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Figure out the right, legal destination reg to copy into.
|
||||||
EVT EltTy = VT.getVectorElementType();
|
EVT EltTy = VT.getVectorElementType();
|
||||||
|
|
||||||
unsigned NumVectorRegs = 1;
|
unsigned NumVectorRegs = 1;
|
||||||
@ -887,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
|
|||||||
|
|
||||||
EVT DestVT = getRegisterType(Context, NewVT);
|
EVT DestVT = getRegisterType(Context, NewVT);
|
||||||
RegisterVT = DestVT;
|
RegisterVT = DestVT;
|
||||||
if (DestVT.bitsLT(NewVT)) {
|
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
|
||||||
// Value is expanded, e.g. i64 -> i16.
|
|
||||||
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
|
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
|
||||||
} else {
|
|
||||||
// Otherwise, promotion or legal types use the same number of registers as
|
|
||||||
// the vector decimated to the appropriate level.
|
|
||||||
return NumVectorRegs;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
// Otherwise, promotion or legal types use the same number of registers as
|
||||||
|
// the vector decimated to the appropriate level.
|
||||||
|
return NumVectorRegs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the EVTs and ArgFlags collections that represent the legalized return
|
/// Get the EVTs and ArgFlags collections that represent the legalized return
|
||||||
|
@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
|||||||
store float %c, float* %P2
|
store float %c, float* %P2
|
||||||
ret void
|
ret void
|
||||||
; X64: test1:
|
; X64: test1:
|
||||||
; X64-NEXT: addss %xmm1, %xmm0
|
; X64-NEXT: pshufd $1, %xmm0, %xmm1
|
||||||
; X64-NEXT: movss %xmm0, (%rdi)
|
; X64-NEXT: addss %xmm0, %xmm1
|
||||||
|
; X64-NEXT: movss %xmm1, (%rdi)
|
||||||
; X64-NEXT: ret
|
; X64-NEXT: ret
|
||||||
|
|
||||||
; X32: test1:
|
; X32: test1:
|
||||||
; X32-NEXT: movss 4(%esp), %xmm0
|
; X32-NEXT: pshufd $1, %xmm0, %xmm1
|
||||||
; X32-NEXT: addss 8(%esp), %xmm0
|
; X32-NEXT: addss %xmm0, %xmm1
|
||||||
; X32-NEXT: movl 12(%esp), %eax
|
; X32-NEXT: movl 4(%esp), %eax
|
||||||
; X32-NEXT: movss %xmm0, (%eax)
|
; X32-NEXT: movss %xmm1, (%eax)
|
||||||
; X32-NEXT: ret
|
; X32-NEXT: ret
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
|
|||||||
ret <2 x float> %Z
|
ret <2 x float> %Z
|
||||||
|
|
||||||
; X64: test2:
|
; X64: test2:
|
||||||
; X64-NEXT: insertps $0
|
; X64-NEXT: addps %xmm1, %xmm0
|
||||||
; X64-NEXT: insertps $16
|
|
||||||
; X64-NEXT: insertps $0
|
|
||||||
; X64-NEXT: insertps $16
|
|
||||||
; X64-NEXT: addps
|
|
||||||
; X64-NEXT: movaps
|
|
||||||
; X64-NEXT: pshufd
|
|
||||||
; X64-NEXT: ret
|
; X64-NEXT: ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define <2 x float> @test3(<4 x float> %A) nounwind {
|
||||||
|
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||||
|
%C = fadd <2 x float> %B, %B
|
||||||
|
ret <2 x float> %C
|
||||||
|
; CHECK: test3:
|
||||||
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x float> @test4(<2 x float> %A) nounwind {
|
||||||
|
%C = fadd <2 x float> %A, %A
|
||||||
|
ret <2 x float> %C
|
||||||
|
; CHECK: test4:
|
||||||
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @test5(<4 x float> %A) nounwind {
|
||||||
|
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||||
|
%C = fadd <2 x float> %B, %B
|
||||||
|
br label %BB
|
||||||
|
|
||||||
|
BB:
|
||||||
|
%D = fadd <2 x float> %C, %C
|
||||||
|
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||||
|
ret <4 x float> %E
|
||||||
|
|
||||||
|
; CHECK: _test5:
|
||||||
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,7 +3,8 @@
|
|||||||
; widening shuffle v3float and then a add
|
; widening shuffle v3float and then a add
|
||||||
define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
|
define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: insertps
|
; CHECK: shuf:
|
||||||
|
; CHECK: extractps
|
||||||
; CHECK: extractps
|
; CHECK: extractps
|
||||||
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
|
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
|
||||||
%val = fadd <3 x float> %x, %src2
|
%val = fadd <3 x float> %x, %src2
|
||||||
@ -15,7 +16,8 @@ entry:
|
|||||||
; widening shuffle v3float with a different mask and then a add
|
; widening shuffle v3float with a different mask and then a add
|
||||||
define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
|
define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: insertps
|
; CHECK: shuf2:
|
||||||
|
; CHECK: extractps
|
||||||
; CHECK: extractps
|
; CHECK: extractps
|
||||||
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
|
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
|
||||||
%val = fadd <3 x float> %x, %src2
|
%val = fadd <3 x float> %x, %src2
|
||||||
@ -26,7 +28,7 @@ entry:
|
|||||||
; Example of when widening a v3float operation causes the DAG to replace a node
|
; Example of when widening a v3float operation causes the DAG to replace a node
|
||||||
; with the operation that we are currently widening, i.e. when replacing
|
; with the operation that we are currently widening, i.e. when replacing
|
||||||
; opA with opB, the DAG will produce new operations with opA.
|
; opA with opB, the DAG will produce new operations with opA.
|
||||||
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
|
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: pshufd
|
; CHECK: pshufd
|
||||||
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user