From e3d920699c6df959f3e0844aeadd983b2955b23e Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 7 Aug 2008 02:54:50 +0000 Subject: [PATCH] Re-enable elimination of unnecessary SUBREG_TO_REG instructions in LowerSubregs, and fix an x86-64 isel bug that this exposed. SUBREG_TO_REG for x86-64 implicit zero extension is only safe for isel to generate when the source is known to always have zeros in the high 32 bits. The EXTRACT_SUBREG instruction does not clear the high 32 bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54444 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LowerSubregs.cpp | 13 +++++++++---- lib/Target/X86/X86Instr64bit.td | 23 +++++++++++++++-------- lib/Target/X86/X86InstrInfo.cpp | 1 + test/CodeGen/X86/subreg-to-reg-0.ll | 11 +++++++++++ test/CodeGen/X86/subreg-to-reg-1.ll | 10 ++++++++++ 5 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/X86/subreg-to-reg-0.ll create mode 100644 test/CodeGen/X86/subreg-to-reg-1.ll diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 1cdd34b77f4..30894ad6ad7 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -108,15 +108,20 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { DOUT << "subreg: CONVERTING: " << *MI; - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); - TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + if (DstSubReg == InsReg) { + // No need to insert an identify copy instruction. + DOUT << "subreg: eliminated!"; + } else { + // Insert sub-register copy + const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); + const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); #ifndef NDEBUG MachineBasicBlock::iterator dMI = MI; DOUT << "subreg: " << *(--dMI); #endif + } DOUT << "\n"; MBB->erase(MI); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index a187f86a107..f891a9e818b 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -258,6 +258,18 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; +// There's no movzlq instruction, but movl can be used for this purpose, using +// implicit zero-extension. We need this because the seeming alternative for +// implementing zext from 32 to 64, an EXTRACT_SUBREG/SUBREG_TO_REG pair, isn't +// safe because both instructions could be optimized away in the +// register-to-register case, leaving nothing behind to do the zero extension. +def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), + "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR32:$src))]>; +def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + let neverHasSideEffects = 1 in { let Defs = [RAX], Uses = [EAX] in def CDQE : RI<0x98, RawFrm, (outs), (ins), @@ -1213,9 +1225,6 @@ def : Pat<(i64 (zext GR32:$src)), // zextload bool -> zextload byte def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(zextloadi64i32 addr:$src), - (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), x86_subreg_32bit)>; - // extload def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; @@ -1240,11 +1249,9 @@ def : Pat<(i64 (anyext (loadi32 addr:$src))), // Some peepholes //===----------------------------------------------------------------------===// -// r & (2^32-1) ==> mov32 + implicit zext -def : Pat<(and GR64:$src, i64immFFFFFFFF), - (SUBREG_TO_REG (i64 0), - (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)), - x86_subreg_32bit)>; +// r & (2^32-1) ==> movz +def : Pat<(and GR64:$src, i64immFFFFFFFF), + (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 61dcfa95b3f..a871fa8dc43 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -396,6 +396,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVZX32rr16, X86::MOVZX32rm16 }, { X86::MOVZX32rr8, X86::MOVZX32rm8 }, { X86::MOVZX64rr16, X86::MOVZX64rm16 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32 }, { X86::MOVZX64rr8, X86::MOVZX64rm8 }, { X86::PSHUFDri, X86::PSHUFDmi }, { X86::PSHUFHWri, X86::PSHUFHWmi }, diff --git a/test/CodeGen/X86/subreg-to-reg-0.ll b/test/CodeGen/X86/subreg-to-reg-0.ll new file mode 100644 index 00000000000..6b60f652659 --- /dev/null +++ b/test/CodeGen/X86/subreg-to-reg-0.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1 + +; Do eliminate the zero-extension instruction and rely on +; x86-64's implicit zero-extension! + +define i64 @foo(i32* %p) nounwind { + %t = load i32* %p + %n = add i32 %t, 1 + %z = zext i32 %n to i64 + ret i64 %z +} diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll new file mode 100644 index 00000000000..4e487e18472 --- /dev/null +++ b/test/CodeGen/X86/subreg-to-reg-1.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep {movl %e.\*, %e.\*} | count 1 + +; Don't eliminate or coalesce away the explicit zero-extension! + +define i64 @foo(i64 %a) { + %b = add i64 %a, 4294967295 + %c = and i64 %b, 4294967295 + %d = add i64 %c, 1 + ret i64 %d +}