mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-14 14:24:05 +00:00
handle equality memcmp of 8 bytes on x86-64 with two unaligned loads and a
compare. On other targets we end up with a call to memcmp because we don't want 16 individual byte loads. We should be able to use movups as well, but we're failing to select the generated icmp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92107 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -5092,17 +5092,8 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size,
|
static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
|
||||||
SelectionDAGBuilder &Builder) {
|
SelectionDAGBuilder &Builder) {
|
||||||
MVT LoadVT;
|
|
||||||
const Type *LoadTy;
|
|
||||||
if (Size == 2) {
|
|
||||||
LoadVT = MVT::i16;
|
|
||||||
LoadTy = Type::getInt16Ty(PtrVal->getContext());
|
|
||||||
} else {
|
|
||||||
LoadVT = MVT::i32;
|
|
||||||
LoadTy = Type::getInt32Ty(PtrVal->getContext());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check to see if this load can be trivially constant folded, e.g. if the
|
// Check to see if this load can be trivially constant folded, e.g. if the
|
||||||
// input is from a string literal.
|
// input is from a string literal.
|
||||||
@ -5158,10 +5149,54 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
|
|||||||
|
|
||||||
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
|
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
|
||||||
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
|
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
|
||||||
if (Size && (Size->getValue() == 2 || Size->getValue() == 4) &&
|
if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
|
||||||
IsOnlyUsedInZeroEqualityComparison(&I)) {
|
bool ActuallyDoIt = true;
|
||||||
SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this);
|
MVT LoadVT;
|
||||||
SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this);
|
const Type *LoadTy;
|
||||||
|
switch (Size->getZExtValue()) {
|
||||||
|
default:
|
||||||
|
LoadVT = MVT::Other;
|
||||||
|
LoadTy = 0;
|
||||||
|
ActuallyDoIt = false;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
LoadVT = MVT::i16;
|
||||||
|
LoadTy = Type::getInt16Ty(Size->getContext());
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
LoadVT = MVT::i32;
|
||||||
|
LoadTy = Type::getInt32Ty(Size->getContext());
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
LoadVT = MVT::i64;
|
||||||
|
LoadTy = Type::getInt64Ty(Size->getContext());
|
||||||
|
break;
|
||||||
|
/*
|
||||||
|
case 16:
|
||||||
|
LoadVT = MVT::v4i32;
|
||||||
|
LoadTy = Type::getInt32Ty(Size->getContext());
|
||||||
|
LoadTy = VectorType::get(LoadTy, 4);
|
||||||
|
break;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
// This turns into unaligned loads. We only do this if the target natively
|
||||||
|
// supports the MVT we'll be loading or if it is small enough (<= 4) that
|
||||||
|
// we'll only produce a small number of byte loads.
|
||||||
|
|
||||||
|
// Require that we can find a legal MVT, and only do this if the target
|
||||||
|
// supports unaligned loads of that type. Expanding into byte loads would
|
||||||
|
// bloat the code.
|
||||||
|
if (ActuallyDoIt && Size->getZExtValue() > 4) {
|
||||||
|
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
|
||||||
|
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
|
||||||
|
if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
|
||||||
|
ActuallyDoIt = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ActuallyDoIt) {
|
||||||
|
SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
|
||||||
|
SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
|
||||||
|
|
||||||
SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
|
SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
|
||||||
ISD::SETNE);
|
ISD::SETNE);
|
||||||
@ -5169,6 +5204,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
|
|||||||
setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
|
setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
; This tests codegen time inlining/optimization of memcmp
|
; This tests codegen time inlining/optimization of memcmp
|
||||||
; rdar://6480398
|
; rdar://6480398
|
||||||
|
|
||||||
@.str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1]
|
@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
|
||||||
|
|
||||||
declare i32 @memcmp(...)
|
declare i32 @memcmp(...)
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ return: ; preds = %entry
|
|||||||
|
|
||||||
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
|
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
|
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
|
||||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||||
br i1 %1, label %return, label %bb
|
br i1 %1, label %return, label %bb
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ return: ; preds = %entry
|
|||||||
|
|
||||||
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
|
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
|
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
|
||||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||||
br i1 %1, label %return, label %bb
|
br i1 %1, label %return, label %bb
|
||||||
|
|
||||||
@ -71,6 +71,40 @@ bb: ; preds = %entry
|
|||||||
return: ; preds = %entry
|
return: ; preds = %entry
|
||||||
ret void
|
ret void
|
||||||
; CHECK: memcmp4a:
|
; CHECK: memcmp4a:
|
||||||
; CHECK: cmpl $2021158767, (%rdi)
|
; CHECK: cmpl $1869573999, (%rdi)
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
|
||||||
|
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||||
|
br i1 %1, label %return, label %bb
|
||||||
|
|
||||||
|
bb: ; preds = %entry
|
||||||
|
store i32 4, i32* %P, align 4
|
||||||
|
ret void
|
||||||
|
|
||||||
|
return: ; preds = %entry
|
||||||
|
ret void
|
||||||
|
; CHECK: memcmp8:
|
||||||
|
; CHECK: movq (%rsi), %rax
|
||||||
|
; CHECK: cmpq %rax, (%rdi)
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
|
||||||
|
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||||
|
br i1 %1, label %return, label %bb
|
||||||
|
|
||||||
|
bb: ; preds = %entry
|
||||||
|
store i32 4, i32* %P, align 4
|
||||||
|
ret void
|
||||||
|
|
||||||
|
return: ; preds = %entry
|
||||||
|
ret void
|
||||||
|
; CHECK: memcmp8a:
|
||||||
|
; CHECK: movabsq $8029759185026510694, %rax
|
||||||
|
; CHECK: cmpq %rax, (%rdi)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user