handle equality memcmp of 8 bytes on x86-64 with two unaligned loads and a

compare. On other targets we end up with a call to memcmp because we don't want 16 individual byte loads. We should be able to use movups as well, but we're failing to select the generated icmp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92107 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-14 14:24:05 +00:00 · 2009-12-24 01:07:17 +00:00
parent 7ed6dd61ac
commit 04b091a782
2 changed files with 93 additions and 23 deletions
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -5092,17 +5092,8 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
  return true;
 }
-static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size,
+static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
                             SelectionDAGBuilder &Builder) {
  MVT LoadVT;
  const Type *LoadTy;
  if (Size == 2) {
    LoadVT = MVT::i16;
    LoadTy = Type::getInt16Ty(PtrVal->getContext());
  } else {
    LoadVT = MVT::i32;
    LoadTy = Type::getInt32Ty(PtrVal->getContext()); 
  }
  // Check to see if this load can be trivially constant folded, e.g. if the
  // input is from a string literal.
@ -5158,10 +5149,54 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
-  if (Size && (Size->getValue() == 2 || Size->getValue() == 4) &&
+  if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
-      IsOnlyUsedInZeroEqualityComparison(&I)) {
+    bool ActuallyDoIt = true;
-    SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this);
+    MVT LoadVT;
-    SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this);
+    const Type *LoadTy;
    switch (Size->getZExtValue()) {
    default:
      LoadVT = MVT::Other;
      LoadTy = 0;
      ActuallyDoIt = false;
      break;
    case 2:
      LoadVT = MVT::i16;
      LoadTy = Type::getInt16Ty(Size->getContext());
      break;
    case 4:
      LoadVT = MVT::i32;
      LoadTy = Type::getInt32Ty(Size->getContext()); 
      break;
    case 8:
      LoadVT = MVT::i64;
      LoadTy = Type::getInt64Ty(Size->getContext()); 
      break;
        /*
    case 16:
      LoadVT = MVT::v4i32;
      LoadTy = Type::getInt32Ty(Size->getContext()); 
      LoadTy = VectorType::get(LoadTy, 4);
      break;
         */
    }
    // This turns into unaligned loads.  We only do this if the target natively
    // supports the MVT we'll be loading or if it is small enough (<= 4) that
    // we'll only produce a small number of byte loads.
    // Require that we can find a legal MVT, and only do this if the target
    // supports unaligned loads of that type.  Expanding into byte loads would
    // bloat the code.
    if (ActuallyDoIt && Size->getZExtValue() > 4) {
      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
        ActuallyDoIt = false;
    }
    if (ActuallyDoIt) {
      SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
      SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
      SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
                                 ISD::SETNE);
@ -5169,6 +5204,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
      setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
      return true;
    }
  }
  return false;
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@ -3,7 +3,7 @@
 ; This tests codegen time inlining/optimization of memcmp
 ; rdar://6480398
-@.str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
 declare i32 @memcmp(...)
@ -26,7 +26,7 @@ return:                                           ; preds = %entry
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
  br i1 %1, label %return, label %bb
@ -60,7 +60,7 @@ return:                                           ; preds = %entry
 define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
  br i1 %1, label %return, label %bb
@ -71,6 +71,40 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
  ret void
 ; CHECK: memcmp4a:
-; CHECK: cmpl $2021158767, (%rdi)
+; CHECK: cmpl $1869573999, (%rdi)
 }
 define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 entry:
  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
  br i1 %1, label %return, label %bb
 bb:                                               ; preds = %entry
  store i32 4, i32* %P, align 4
  ret void
 return:                                           ; preds = %entry
  ret void
 ; CHECK: memcmp8:
 ; CHECK: movq    (%rsi), %rax
 ; CHECK: cmpq    %rax, (%rdi)
 }
 define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
 entry:
  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
  br i1 %1, label %return, label %bb
 bb:                                               ; preds = %entry
  store i32 4, i32* %P, align 4
  ret void
 return:                                           ; preds = %entry
  ret void
 ; CHECK: memcmp8a:
 ; CHECK: movabsq $8029759185026510694, %rax
 ; CHECK: cmpq	%rax, (%rdi)
 }