mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
DAGCombiner: Fold pointless truncate, bitcast, buildvector series
(2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y))) can be folded into a (2xi32) (buildvector i32 a, i32 b). Such a DAG would cause uneccessary vdup instructions followed by vmovn instructions. We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in the vectorized version of the code below. double A[N]; double B[N]; void test_double_compare_to_double() { int i; for(i=0;i<N;i++) A[i] = (double)(A[i] < B[i]); } radar://13191881 Fixes bug 15283. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175670 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
64f3e763cd
commit
c46e2df74c
@ -5361,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// Fold a series of buildvector, bitcast, and truncate if possible.
|
||||
// For example fold
|
||||
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
|
||||
// (2xi32 (buildvector x, y)).
|
||||
if (Level == AfterLegalizeVectorOps && VT.isVector() &&
|
||||
N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
|
||||
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N0.getOperand(0).hasOneUse()) {
|
||||
|
||||
SDValue BuildVect = N0.getOperand(0);
|
||||
EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
|
||||
EVT TruncVecEltTy = VT.getVectorElementType();
|
||||
|
||||
// Check that the element types match.
|
||||
if (BuildVectEltTy == TruncVecEltTy) {
|
||||
// Now we only need to compute the offset of the truncated elements.
|
||||
unsigned BuildVecNumElts = BuildVect.getNumOperands();
|
||||
unsigned TruncVecNumElts = VT.getVectorNumElements();
|
||||
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
|
||||
|
||||
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
|
||||
"Invalid number of elements");
|
||||
|
||||
SmallVector<SDValue, 8> Opnds;
|
||||
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
|
||||
Opnds.push_back(BuildVect.getOperand(i));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
|
||||
Opnds.size());
|
||||
}
|
||||
}
|
||||
|
||||
// See if we can simplify the input to this truncate through knowledge that
|
||||
// only the low bits are being used.
|
||||
// For example "trunc (or (shl x, 8), y)" // -> trunc y
|
||||
|
15
test/CodeGen/ARM/neon_cmp.ll
Normal file
15
test/CodeGen/ARM/neon_cmp.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
|
||||
; bug 15283
|
||||
; radar://13191881
|
||||
; CHECK: vfcmp
|
||||
define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
|
||||
%wide.load = load <2 x double>* %a, align 4
|
||||
%wide.load2 = load <2 x double>* %b, align 4
|
||||
; CHECK-NOT: vdup.32
|
||||
; CHECK-NOT: vmovn.i64
|
||||
%v1 = fcmp olt <2 x double> %wide.load, %wide.load2
|
||||
%v2 = zext <2 x i1> %v1 to <2 x i32>
|
||||
%v3 = sitofp <2 x i32> %v2 to <2 x double>
|
||||
store <2 x double> %v3, <2 x double>* %b, align 4
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user