mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	handle equality memcmp of 8 bytes on x86-64 with two unaligned loads and a
compare. On other targets we end up with a call to memcmp because we don't want 16 individual byte loads. We should be able to use movups as well, but we're failing to select the generated icmp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92107 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -5092,17 +5092,8 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size, | ||||
| static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, | ||||
|                              SelectionDAGBuilder &Builder) { | ||||
|   MVT LoadVT; | ||||
|   const Type *LoadTy; | ||||
|   if (Size == 2) { | ||||
|     LoadVT = MVT::i16; | ||||
|     LoadTy = Type::getInt16Ty(PtrVal->getContext()); | ||||
|   } else { | ||||
|     LoadVT = MVT::i32; | ||||
|     LoadTy = Type::getInt32Ty(PtrVal->getContext());  | ||||
|   } | ||||
|    | ||||
|   // Check to see if this load can be trivially constant folded, e.g. if the | ||||
|   // input is from a string literal. | ||||
| @@ -5158,16 +5149,61 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { | ||||
|    | ||||
|   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0 | ||||
|   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0 | ||||
|   if (Size && (Size->getValue() == 2 || Size->getValue() == 4) && | ||||
|       IsOnlyUsedInZeroEqualityComparison(&I)) { | ||||
|     SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this); | ||||
|     SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this); | ||||
|   if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { | ||||
|     bool ActuallyDoIt = true; | ||||
|     MVT LoadVT; | ||||
|     const Type *LoadTy; | ||||
|     switch (Size->getZExtValue()) { | ||||
|     default: | ||||
|       LoadVT = MVT::Other; | ||||
|       LoadTy = 0; | ||||
|       ActuallyDoIt = false; | ||||
|       break; | ||||
|     case 2: | ||||
|       LoadVT = MVT::i16; | ||||
|       LoadTy = Type::getInt16Ty(Size->getContext()); | ||||
|       break; | ||||
|     case 4: | ||||
|       LoadVT = MVT::i32; | ||||
|       LoadTy = Type::getInt32Ty(Size->getContext());  | ||||
|       break; | ||||
|     case 8: | ||||
|       LoadVT = MVT::i64; | ||||
|       LoadTy = Type::getInt64Ty(Size->getContext());  | ||||
|       break; | ||||
|         /* | ||||
|     case 16: | ||||
|       LoadVT = MVT::v4i32; | ||||
|       LoadTy = Type::getInt32Ty(Size->getContext());  | ||||
|       LoadTy = VectorType::get(LoadTy, 4); | ||||
|       break; | ||||
|          */ | ||||
|     } | ||||
|      | ||||
|     SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, | ||||
|                                ISD::SETNE); | ||||
|     EVT CallVT = TLI.getValueType(I.getType(), true); | ||||
|     setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); | ||||
|     return true; | ||||
|     // This turns into unaligned loads.  We only do this if the target natively | ||||
|     // supports the MVT we'll be loading or if it is small enough (<= 4) that | ||||
|     // we'll only produce a small number of byte loads. | ||||
|      | ||||
|     // Require that we can find a legal MVT, and only do this if the target | ||||
|     // supports unaligned loads of that type.  Expanding into byte loads would | ||||
|     // bloat the code. | ||||
|     if (ActuallyDoIt && Size->getZExtValue() > 4) { | ||||
|       // TODO: Handle 5 byte compare as 4-byte + 1 byte. | ||||
|       // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. | ||||
|       if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) | ||||
|         ActuallyDoIt = false; | ||||
|     } | ||||
|      | ||||
|     if (ActuallyDoIt) { | ||||
|       SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); | ||||
|       SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); | ||||
|        | ||||
|       SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, | ||||
|                                  ISD::SETNE); | ||||
|       EVT CallVT = TLI.getValueType(I.getType(), true); | ||||
|       setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); | ||||
|       return true; | ||||
|     } | ||||
|   } | ||||
|    | ||||
|    | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| ; This tests codegen time inlining/optimization of memcmp | ||||
| ; rdar://6480398 | ||||
|  | ||||
| @.str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1] | ||||
| @.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1] | ||||
|  | ||||
| declare i32 @memcmp(...) | ||||
|  | ||||
| @@ -26,7 +26,7 @@ return:                                           ; preds = %entry | ||||
|  | ||||
| define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind { | ||||
| entry: | ||||
|   %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1] | ||||
|   %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1] | ||||
|   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1] | ||||
|   br i1 %1, label %return, label %bb | ||||
|  | ||||
| @@ -60,7 +60,7 @@ return:                                           ; preds = %entry | ||||
|  | ||||
| define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind { | ||||
| entry: | ||||
|   %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1] | ||||
|   %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1] | ||||
|   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1] | ||||
|   br i1 %1, label %return, label %bb | ||||
|  | ||||
| @@ -71,6 +71,40 @@ bb:                                               ; preds = %entry | ||||
| return:                                           ; preds = %entry | ||||
|   ret void | ||||
| ; CHECK: memcmp4a: | ||||
| ; CHECK: cmpl $2021158767, (%rdi) | ||||
| ; CHECK: cmpl $1869573999, (%rdi) | ||||
| } | ||||
|  | ||||
| define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { | ||||
| entry: | ||||
|   %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1] | ||||
|   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1] | ||||
|   br i1 %1, label %return, label %bb | ||||
|  | ||||
| bb:                                               ; preds = %entry | ||||
|   store i32 4, i32* %P, align 4 | ||||
|   ret void | ||||
|  | ||||
| return:                                           ; preds = %entry | ||||
|   ret void | ||||
| ; CHECK: memcmp8: | ||||
| ; CHECK: movq    (%rsi), %rax | ||||
| ; CHECK: cmpq    %rax, (%rdi) | ||||
| } | ||||
|  | ||||
| define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind { | ||||
| entry: | ||||
|   %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1] | ||||
|   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1] | ||||
|   br i1 %1, label %return, label %bb | ||||
|  | ||||
| bb:                                               ; preds = %entry | ||||
|   store i32 4, i32* %P, align 4 | ||||
|   ret void | ||||
|  | ||||
| return:                                           ; preds = %entry | ||||
|   ret void | ||||
| ; CHECK: memcmp8a: | ||||
| ; CHECK: movabsq $8029759185026510694, %rax | ||||
| ; CHECK: cmpq	%rax, (%rdi) | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user