From e8838d5c5f6cae7daa2507a114c896dc5a4ae097 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 25 Jan 2012 23:36:27 +0000 Subject: [PATCH] Improve sub-register def handling in ProcessImplicitDefs. This boils down to using MachineOperand::readsReg() more. This fixes PR11829 where a use ended up after the first def when lowering REG_SEQUENCE instructions involving IMPLICIT_DEFs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ProcessImplicitDefs.cpp | 19 ++++++++------- .../ARM/2012-01-24-RegSequenceLiveRange.ll | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b1d8c976022..30a9776f422 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -50,10 +50,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, SmallSet &ImpDefRegs) { switch(OpIdx) { case 1: - return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isCopy() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); case 2: - return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); default: return false; } @@ -66,7 +66,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg, MachineOperand &MO1 = MI->getOperand(1); if (MO1.getReg() != Reg) return false; - if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg())) + if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) return true; return false; } @@ -105,7 +105,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { - if (MI->getOperand(0).getSubReg()) + ImpDefMIs.push_back(MI); + // Is this a sub-register read-modify-write? + if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); @@ -113,12 +115,11 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } - ImpDefMIs.push_back(MI); continue; } // Eliminate %reg1032:sub = COPY undef. - if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { @@ -140,7 +141,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) + if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -172,10 +173,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other uses of + // Make sure other reads of Reg are also marked . for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll index faf65b60be8..872eca34ad1 100644 --- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll +++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing ; PR11841 +; PR11829 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv7-none-linux-eabi" @@ -37,7 +38,30 @@ bb: ret void } +define arm_aapcs_vfpcc void @foo2() nounwind uwtable { +entry: + br i1 undef, label %for.end, label %cond.end295 + +cond.end295: ; preds = %entry + %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> + %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float> + %1 = bitcast <4 x float> undef to <2 x i64> + %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> + %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float> + tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind + tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind + unreachable + +for.end: ; preds = %entry + ret void +} + declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null}