mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	DAGCombiner: Fold pointless truncate, bitcast, buildvector series
(2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y)))
can be folded into a (2xi32) (buildvector i32 a, i32 b).
Such a DAG would cause uneccessary vdup instructions followed by vmovn
instructions.
We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in
the vectorized version of the code below.
double A[N];
double B[N];
void test_double_compare_to_double() {
  int i;
  for(i=0;i<N;i++)
    A[i] = (double)(A[i] < B[i]);
}
radar://13191881
Fixes bug 15283.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175670 91177308-0d34-0410-b5e6-96231b3b80d8
			
			
This commit is contained in:
		| @@ -5361,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Fold a series of buildvector, bitcast, and truncate if possible. | ||||
|   // For example fold | ||||
|   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to | ||||
|   //   (2xi32 (buildvector x, y)). | ||||
|   if (Level == AfterLegalizeVectorOps && VT.isVector() && | ||||
|       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && | ||||
|       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && | ||||
|       N0.getOperand(0).hasOneUse()) { | ||||
|  | ||||
|     SDValue BuildVect = N0.getOperand(0); | ||||
|     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); | ||||
|     EVT TruncVecEltTy = VT.getVectorElementType(); | ||||
|  | ||||
|     // Check that the element types match. | ||||
|     if (BuildVectEltTy == TruncVecEltTy) { | ||||
|       // Now we only need to compute the offset of the truncated elements. | ||||
|       unsigned BuildVecNumElts =  BuildVect.getNumOperands(); | ||||
|       unsigned TruncVecNumElts = VT.getVectorNumElements(); | ||||
|       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; | ||||
|  | ||||
|       assert((BuildVecNumElts % TruncVecNumElts) == 0 && | ||||
|              "Invalid number of elements"); | ||||
|  | ||||
|       SmallVector<SDValue, 8> Opnds; | ||||
|       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) | ||||
|         Opnds.push_back(BuildVect.getOperand(i)); | ||||
|  | ||||
|       return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], | ||||
|                          Opnds.size()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // See if we can simplify the input to this truncate through knowledge that | ||||
|   // only the low bits are being used. | ||||
|   // For example "trunc (or (shl x, 8), y)" // -> trunc y | ||||
|   | ||||
							
								
								
									
										15
									
								
								test/CodeGen/ARM/neon_cmp.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								test/CodeGen/ARM/neon_cmp.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s | ||||
| ; bug 15283 | ||||
| ; radar://13191881 | ||||
| ; CHECK: vfcmp | ||||
| define void @vfcmp(<2 x double>* %a, <2 x double>* %b) { | ||||
|   %wide.load = load <2 x double>* %a, align 4 | ||||
|   %wide.load2 = load <2 x double>* %b, align 4 | ||||
| ; CHECK-NOT: vdup.32 | ||||
| ; CHECK-NOT: vmovn.i64 | ||||
|   %v1 = fcmp olt <2 x double> %wide.load, %wide.load2 | ||||
|   %v2 = zext <2 x i1> %v1 to <2 x i32> | ||||
|   %v3 = sitofp <2 x i32> %v2 to <2 x double> | ||||
|   store <2 x double> %v3, <2 x double>* %b, align 4 | ||||
|   ret void | ||||
| } | ||||
		Reference in New Issue
	
	Block a user