mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Loop Vectorize: optimize the vectorization of trunc(induction_var). The truncation is now done on scalars.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169904 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, | ||||
|     case Instruction::Trunc: | ||||
|     case Instruction::FPTrunc: | ||||
|     case Instruction::BitCast: { | ||||
|       /// Vectorize bitcasts. | ||||
|       CastInst *CI = dyn_cast<CastInst>(it); | ||||
|       /// Optimize the special case where the source is the induction | ||||
|       /// variable. Notice that we can only optimize the 'trunc' case | ||||
|       /// because: a. FP conversions lose precision, b. sext/zext may wrap, | ||||
|       /// c. other casts depend on pointer size. | ||||
|       if (CI->getOperand(0) == OldInduction && | ||||
|           it->getOpcode() == Instruction::Trunc) { | ||||
|         Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, | ||||
|                                                CI->getType()); | ||||
|         Value *Broadcasted = getBroadcastInstrs(ScalarCast); | ||||
|         WidenMap[it] = getConsecutiveVector(Broadcasted); | ||||
|         break; | ||||
|       } | ||||
|       /// Vectorize casts. | ||||
|       Value *A = getVectorValue(it->getOperand(0)); | ||||
|       Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); | ||||
|       WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); | ||||
|   | ||||
							
								
								
									
										30
									
								
								test/Transforms/LoopVectorize/cast-induction.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								test/Transforms/LoopVectorize/cast-induction.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| ; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s | ||||
|  | ||||
| ; rdar://problem/12848162 | ||||
|  | ||||
| target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" | ||||
| target triple = "x86_64-apple-macosx10.8.0" | ||||
|  | ||||
| @a = common global [2048 x i32] zeroinitializer, align 16 | ||||
|  | ||||
| ;CHECK: @example12 | ||||
| ;CHECK: trunc i64 | ||||
| ;CHECK: store <4 x i32> | ||||
| ;CHECK: ret void | ||||
| define void @example12() nounwind uwtable ssp { | ||||
|   br label %1 | ||||
|  | ||||
| ; <label>:1                                       ; preds = %1, %0 | ||||
|   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] | ||||
|   %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv | ||||
|   %3 = trunc i64 %indvars.iv to i32 | ||||
|   store i32 %3, i32* %2, align 4 | ||||
|   %indvars.iv.next = add i64 %indvars.iv, 1 | ||||
|   %lftr.wideiv = trunc i64 %indvars.iv.next to i32 | ||||
|   %exitcond = icmp eq i32 %lftr.wideiv, 1024 | ||||
|   br i1 %exitcond, label %4, label %1 | ||||
|  | ||||
| ; <label>:4                                       ; preds = %1 | ||||
|   ret void | ||||
| } | ||||
|  | ||||
| @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 | ||||
| target triple = "x86_64-apple-macosx10.8.0" | ||||
|  | ||||
| ;CHECK: @cpp_new_arrays | ||||
| ;CHECK: insertelement <4 x i32> | ||||
| ;CHECK: sext i32 | ||||
| ;CHECK: load <4 x float> | ||||
| ;CHECK: fadd <4 x float> | ||||
| ;CHECK: ret i32 | ||||
|   | ||||
| @@ -329,7 +329,7 @@ define void @example11() nounwind uwtable ssp { | ||||
| } | ||||
|  | ||||
| ;CHECK: @example12 | ||||
| ;CHECK: trunc <4 x i64> | ||||
| ;CHECK: trunc i64 | ||||
| ;CHECK: store <4 x i32> | ||||
| ;CHECK: ret void | ||||
| define void @example12() nounwind uwtable ssp { | ||||
|   | ||||
| @@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0" | ||||
| @array = common global [1024 x i32] zeroinitializer, align 16 | ||||
|  | ||||
| ;CHECK: @array_at_plus_one | ||||
| ;CHECK: add <4 x i64> | ||||
| ;CHECK: trunc <4 x i64> | ||||
| ;CHECK: trunc i64 | ||||
| ;CHECK: add i64 %index, 12 | ||||
| ;CHECK: ret i32 | ||||
| define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user