Re-enable elimination of unnecessary SUBREG_TO_REG instructions in

LowerSubregs, and fix an x86-64 isel bug that this exposed.

SUBREG_TO_REG for x86-64 implicit zero extension is only safe for
isel to generate when the source is known to always have zeros in
the high 32 bits. The EXTRACT_SUBREG instruction does not clear
the high 32 bits.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54444 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2008-08-07 02:54:50 +00:00
parent 865db4566e
commit e3d920699c
5 changed files with 46 additions and 12 deletions

View File

@ -108,15 +108,20 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
DOUT << "subreg: CONVERTING: " << *MI; DOUT << "subreg: CONVERTING: " << *MI;
// Insert sub-register copy if (DstSubReg == InsReg) {
const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); // No need to insert an identify copy instruction.
const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); DOUT << "subreg: eliminated!";
TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); } else {
// Insert sub-register copy
const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
#ifndef NDEBUG #ifndef NDEBUG
MachineBasicBlock::iterator dMI = MI; MachineBasicBlock::iterator dMI = MI;
DOUT << "subreg: " << *(--dMI); DOUT << "subreg: " << *(--dMI);
#endif #endif
}
DOUT << "\n"; DOUT << "\n";
MBB->erase(MI); MBB->erase(MI);

View File

@ -258,6 +258,18 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
[(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. We need this because the seeming alternative for
// implementing zext from 32 to 64, an EXTRACT_SUBREG/SUBREG_TO_REG pair, isn't
// safe because both instructions could be optimized away in the
// register-to-register case, leaving nothing behind to do the zero extension.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
"mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
[(set GR64:$dst, (zext GR32:$src))]>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
[(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in {
let Defs = [RAX], Uses = [EAX] in let Defs = [RAX], Uses = [EAX] in
def CDQE : RI<0x98, RawFrm, (outs), (ins), def CDQE : RI<0x98, RawFrm, (outs), (ins),
@ -1213,9 +1225,6 @@ def : Pat<(i64 (zext GR32:$src)),
// zextload bool -> zextload byte // zextload bool -> zextload byte
def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
def : Pat<(zextloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), x86_subreg_32bit)>;
// extload // extload
def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
@ -1240,11 +1249,9 @@ def : Pat<(i64 (anyext (loadi32 addr:$src))),
// Some peepholes // Some peepholes
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// r & (2^32-1) ==> mov32 + implicit zext // r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, i64immFFFFFFFF), def : Pat<(and GR64:$src, i64immFFFFFFFF),
(SUBREG_TO_REG (i64 0), (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
(i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)),
x86_subreg_32bit)>;
// r & (2^16-1) ==> movz // r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff), def : Pat<(and GR64:$src, 0xffff),
(MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;

View File

@ -396,6 +396,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVZX32rr16, X86::MOVZX32rm16 }, { X86::MOVZX32rr16, X86::MOVZX32rm16 },
{ X86::MOVZX32rr8, X86::MOVZX32rm8 }, { X86::MOVZX32rr8, X86::MOVZX32rm8 },
{ X86::MOVZX64rr16, X86::MOVZX64rm16 }, { X86::MOVZX64rr16, X86::MOVZX64rm16 },
{ X86::MOVZX64rr32, X86::MOVZX64rm32 },
{ X86::MOVZX64rr8, X86::MOVZX64rm8 }, { X86::MOVZX64rr8, X86::MOVZX64rm8 },
{ X86::PSHUFDri, X86::PSHUFDmi }, { X86::PSHUFDri, X86::PSHUFDmi },
{ X86::PSHUFHWri, X86::PSHUFHWmi }, { X86::PSHUFHWri, X86::PSHUFHWmi },

View File

@ -0,0 +1,11 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
; Do eliminate the zero-extension instruction and rely on
; x86-64's implicit zero-extension!
define i64 @foo(i32* %p) nounwind {
%t = load i32* %p
%n = add i32 %t, 1
%z = zext i32 %n to i64
ret i64 %z
}

View File

@ -0,0 +1,10 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep {movl %e.\*, %e.\*} | count 1
; Don't eliminate or coalesce away the explicit zero-extension!
define i64 @foo(i64 %a) {
%b = add i64 %a, 4294967295
%c = and i64 %b, 4294967295
%d = add i64 %c, 1
ret i64 %d
}