From e3b23cde80b19507f1d8b641a541e91ace0864dc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 2 Apr 2012 22:30:39 +0000 Subject: [PATCH] Allocate virtual registers in ascending order. This is just the fallback tie-breaker ordering, the main allocation order is still descending size. Patch by Shamil Kurmangaleev! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153904 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocGreedy.cpp | 4 ++-- test/CodeGen/ARM/ldrd.ll | 3 +-- test/CodeGen/ARM/reg_asc_order.ll | 16 ++++++++++++++++ test/CodeGen/ARM/select.ll | 10 +++++----- test/CodeGen/X86/store_op_load_fold2.ll | 14 ++++++++++---- test/CodeGen/X86/vec_shuffle-37.ll | 8 ++++---- 6 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/ARM/reg_asc_order.ll diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index f29a85287ba..d00259a6d91 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -428,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index a588bc31535..3f8fd75f49f 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -44,8 +44,7 @@ entry: ; BASIC: str ; GREEDY: @f ; GREEDY: %bb -; GREEDY: ldr -; GREEDY: ldr +; GREEDY: ldrd ; GREEDY: str define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind { entry: diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll new file mode 100644 index 00000000000..d1d0ee5f3e7 --- /dev/null +++ b/test/CodeGen/ARM/reg_asc_order.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; Check that memcpy gets lowered to ldm/stm, at least in this very smple case. + +%struct.Foo = type { i32, i32, i32, i32 } + +define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind { +entry: +;CHECK: ldm +;CHECK: stm + %0 = bitcast %struct.Foo* %a to i8* + %1 = bitcast %struct.Foo* %b to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index f1bd7ee53f8..3e07da841a5 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -76,12 +76,12 @@ define double @f7(double %a, double %b) { ; block generated, odds are good that we have close to the ideal code for this: ; ; CHECK-NEON: _f8: -; CHECK-NEON: adr r2, LCPI7_0 -; CHECK-NEON-NEXT: movw r3, #1123 -; CHECK-NEON-NEXT: adds r1, r2, #4 -; CHECK-NEON-NEXT: cmp r0, r3 +; CHECK-NEON: adr [[R2:r[0-9]+]], LCPI7_0 +; CHECK-NEON-NEXT: movw [[R3:r[0-9]+]], #1123 +; CHECK-NEON-NEXT: adds {{r.*}}, [[R2]], #4 +; CHECK-NEON-NEXT: cmp r0, [[R3]] ; CHECK-NEON-NEXT: it ne -; CHECK-NEON-NEXT: movne r1, r2 +; CHECK-NEON-NEXT: movne {{r.*}}, [[R2]] ; CHECK-NEON-NEXT: ldr ; CHECK-NEON: bx diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll index 11686227ab9..8313166a90c 100644 --- a/test/CodeGen/X86/store_op_load_fold2.ll +++ b/test/CodeGen/X86/store_op_load_fold2.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT +; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL target datalayout = "e-p:32:32" %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } @@ -16,9 +17,14 @@ cond_true2732.preheader: ; preds = %entry store i64 %tmp2676.us.us, i64* %tmp2666 ret i32 0 -; CHECK: and {{E..}}, DWORD PTR [360] -; CHECK: and DWORD PTR [356], {{E..}} -; CHECK: mov DWORD PTR [360], {{E..}} +; INTEL: and {{E..}}, DWORD PTR [360] +; INTEL: and DWORD PTR [356], {{E..}} +; FIXME: mov DWORD PTR [360], {{E..}} +; The above line comes out as 'mov 360, EAX', but when the register is ECX it works? + +; ATT: andl 360, %{{e..}} +; ATT: andl %{{e..}}, 356 +; ATT: movl %{{e..}}, 360 } diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll index 65486cb80c9..619652aff15 100644 --- a/test/CodeGen/X86/vec_shuffle-37.ll +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -4,10 +4,10 @@ define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp { entry: -; CHECK: movaps ({{%rdi|%rcx}}), %xmm0 -; CHECK: movaps %xmm0, %xmm1 -; CHECK-NEXT: movss %xmm2, %xmm1 -; CHECK-NEXT: shufps $36, %xmm1, %xmm0 +; CHECK: movaps ({{%rdi|%rcx}}), %[[XMM0:xmm[0-9]+]] +; CHECK: movaps %[[XMM0]], %[[XMM1:xmm[0-9]+]] +; CHECK-NEXT: movss %xmm{{[0-9]+}}, %[[XMM1]] +; CHECK-NEXT: shufps $36, %[[XMM1]], %[[XMM0]] %0 = load <4 x i32>* undef, align 16 %1 = load <4 x i32>* %a0, align 16 %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32>