[x32] Fix FrameIndex check in SelectLEA64_32Addr

Summary:
Fixes http://llvm.org/bugs/show_bug.cgi?id=20016 reproducible on new
lea-5.ll case.
Also use RSP/RBP for x32 lea to save 1 byte used for 0x67 prefix in
ESP/EBP case.

Test Plan: lea tests modified to include x32/nacl and new test added

Reviewers: nadav, dschuff, t.p.northover

Subscribers: llvm-commits, zinovy.nis

Differential Revision: http://reviews.llvm.org/D4929

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216065 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Pavel Chupin 2014-08-20 11:59:22 +00:00
parent 40f9d11ccc
commit aadaac228d
7 changed files with 78 additions and 3 deletions

View File

@ -1428,7 +1428,7 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
if (RN && RN->getReg() == 0)
Base = CurDAG->getRegister(0, MVT::i64);
else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) {
else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
// Base could already be %rip, particularly in the x32 ABI.
Base = SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,

View File

@ -489,6 +489,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
// For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
// register as source operand, semantic is the same and destination is
// 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false);
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);

View File

@ -1,4 +1,7 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux -x86-asm-syntax=intel | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -x86-asm-syntax=intel | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-nacl -x86-asm-syntax=intel | FileCheck %s
define i32 @test1(i32 %A, i32 %B) {
%tmp1 = shl i32 %A, 2

View File

@ -1,4 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax

View File

@ -1,4 +1,7 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
define zeroext i16 @t1(i32 %on_off) nounwind {
entry:

59
test/CodeGen/X86/lea-5.ll Normal file
View File

@ -0,0 +1,59 @@
; test for more complicated forms of lea operands which can be generated
; in loop optimized cases.
; See also http://llvm.org/bugs/show_bug.cgi?id=20016
; RUN: llc < %s -mtriple=x86_64-linux -O2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -O2 | FileCheck %s -check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-nacl -O2 | FileCheck %s -check-prefix=X32
; Function Attrs: nounwind readnone uwtable
define void @foo(i32 %x, i32 %d) #0 {
entry:
%a = alloca [8 x i32], align 16
br label %while.cond
while.cond: ; preds = %while.cond, %entry
%d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ]
%arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0
; CHECK: leaq -40(%rsp,%r{{[^,]*}},4), %rax
; X32: leal -40(%rsp,%r{{[^,]*}},4), %eax
%0 = load i32* %arrayidx, align 4
%cmp1 = icmp eq i32 %0, 0
%inc = add nsw i32 %d.addr.0, 1
; CHECK: leaq 4(%r{{[^,]*}}), %r{{[^,]*}}
; X32: leal 4(%r{{[^,]*}}), %e{{[^,]*}}
br i1 %cmp1, label %while.end, label %while.cond
while.end: ; preds = %while.cond
ret void
}
; The same test as above but with enforsed stack realignment (%a aligned by 64)
; to check one more case of correct lea generation.
; Function Attrs: nounwind readnone uwtable
define void @bar(i32 %x, i32 %d) #0 {
entry:
%a = alloca [8 x i32], align 64
br label %while.cond
while.cond: ; preds = %while.cond, %entry
%d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ]
%arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0
; CHECK: leaq (%rsp,%r{{[^,]*}},4), %rax
; X32: leal (%rsp,%r{{[^,]*}},4), %eax
%0 = load i32* %arrayidx, align 4
%cmp1 = icmp eq i32 %0, 0
%inc = add nsw i32 %d.addr.0, 1
; CHECK: leaq 4(%r{{[^,]*}}), %r{{[^,]*}}
; X32: leal 4(%r{{[^,]*}}), %e{{[^,]*}}
br i1 %cmp1, label %while.end, label %while.cond
while.end: ; preds = %while.cond
ret void
}

View File

@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
define i32 @test1(i32 %x) nounwind {
%tmp1 = shl i32 %x, 3