From d8319655f209cf2605a61d4a60b779e51baa29cc Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed.bougacha@gmail.com>
Date: Fri, 22 May 2015 21:37:17 +0000
Subject: [PATCH] [AArch64][CGP] Sink zext feeding stxr/stlxr into the same
 block.

The usual CodeGenPrepare trickery, on a target-specific intrinsic.
Without this, the expansion of atomics will usually have the zext
be hoisted out of the loop, defeating the various patterns we have
to catch this precise case.

Differential Revision: http://reviews.llvm.org/D9930


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238054 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/CodeGenPrepare.cpp       | 10 ++++++++++
 test/CodeGen/AArch64/arm64-atomic.ll |  6 ++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index f37a2874b25..cf2b0a29b84 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1397,6 +1397,16 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       }
       return false;
     }
+    case Intrinsic::aarch64_stlxr:
+    case Intrinsic::aarch64_stxr: {
+      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+      if (!ExtVal || !ExtVal->hasOneUse() ||
+          ExtVal->getParent() == CI->getParent())
+        return false;
+      // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+      ExtVal->moveBefore(CI);
+      return true;
+    }
     }
 
     if (TLI) {
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index fa07e9f2e91..9136fb6271b 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -2,12 +2,11 @@
 
 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap:
-; CHECK-NEXT: ubfx   x[[NEWVAL_REG:[0-9]+]], x2, #0, #32
 ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
 ; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w[[NEWVAL_REG]], [x0]
+; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w2, [x0]
 ; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
 ; CHECK-NEXT: [[LABEL2]]:
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
@@ -17,12 +16,11 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 
 define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap_rel:
-; CHECK-NEXT: ubfx   x[[NEWVAL_REG:[0-9]+]], x2, #0, #32
 ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
 ; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w[[NEWVAL_REG]], [x0]
+; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w2, [x0]
 ; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
 ; CHECK-NEXT: [[LABEL2]]:
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic