MachineCopyPropagation: Remove the copies instead of using KILL instructions.

For some history here see the commit messages of r199797 and r169060.

The original intent was to fix cases like:

%EAX<def> = COPY %ECX<kill>, %RAX<imp-def>
%RCX<def> = COPY %RAX<kill>

where simply removing the copies would have RCX undefined as in terms of
machine operands only the ECX part of it is defined. The machine
verifier would complain about this so 169060 changed such COPY
instructions into KILL instructions so some super-register imp-defs
would be preserved. In r199797 it was finally decided to always do this
regardless of super-register defs.

But this is wrong, consider:
R1 = COPY R0
...
R0 = COPY R1
getting changed to:
R1 = KILL R0
...
R0 = KILL R1

It now looks like R0 dies at the first KILL and won't be alive until the
second KILL, while in reality R0 is alive and must not change in this
part of the program.

As this only happens after register allocation there is not much code
still performing liveness queries so the issue was not noticed.  In fact
I didn't manage to create a testcase for this, without unrelated changes
I am working on at the moment.

The fix is simple: As of r223896 the MachineVerifier allows reads from
partially defined registers, so the whole transforming COPY->KILL thing
is not necessary anymore. This patch also changes a similar (but more
benign case as the def and src are the same register) case in the
VirtRegRewriter.

Differential Revision: http://reviews.llvm.org/D10117

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238588 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matthias Braun 2015-05-29 18:19:25 +00:00
parent 2368d54c07
commit 3bd732d1ee
7 changed files with 9 additions and 44 deletions

View File

@ -54,7 +54,6 @@ namespace {
SourceMap &SrcMap,
DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
bool CopyPropagateBlock(MachineBasicBlock &MBB);
void removeCopy(MachineInstr *MI);
};
}
char MachineCopyPropagation::ID = 0;
@ -127,13 +126,6 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
return false;
}
// Remove MI from the function because it has been determined it is dead.
// Turn it into a noop KILL instruction as opposed to removing it to
// maintain imp-use/imp-def chains.
void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
MI->setDesc(TII->get(TargetOpcode::KILL));
}
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
@ -183,7 +175,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
I->clearRegisterKills(Def, TRI);
removeCopy(MI);
MI->eraseFromParent();
Changed = true;
++NumDeletes;
continue;
@ -291,7 +283,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
continue;
DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
(*DI)->dump());
removeCopy(*DI);
(*DI)->eraseFromParent();
Changed = true;
++NumDeletes;
}
@ -327,7 +319,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
DI != DE; ++DI) {
if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
removeCopy(*DI);
(*DI)->eraseFromParent();
Changed = true;
++NumDeletes;
}

View File

@ -417,17 +417,11 @@ void VirtRegRewriter::rewrite() {
// Finally, remove any identity copies.
if (MI->isIdentityCopy()) {
++NumIdCopies;
if (MI->getNumOperands() == 2) {
DEBUG(dbgs() << "Deleting identity copy.\n");
if (Indexes)
Indexes->removeMachineInstrFromMaps(MI);
// It's safe to erase MI because MII has already been incremented.
MI->eraseFromParent();
} else {
// Transform identity copy to a KILL to deal with subregisters.
MI->setDesc(TII->get(TargetOpcode::KILL));
DEBUG(dbgs() << "Identity copy: " << *MI);
}
DEBUG(dbgs() << "Deleting identity copy.\n");
if (Indexes)
Indexes->removeMachineInstrFromMaps(MI);
// It's safe to erase MI because MII has already been incremented.
MI->eraseFromParent();
}
}
}

View File

@ -116,11 +116,8 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; KNL-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
@ -130,11 +127,8 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; KNL-LABEL: test10:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; KNL-NEXT: retq
; SKX-LABEL: test10:
; SKX: ## BB#0:
@ -166,7 +160,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
; KNL-NEXT: kunpckbw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
; KNL-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16

View File

@ -11,8 +11,7 @@
@NullToken = external global i64
; CHECK-LABEL: Part_Create:
; CHECK-DAG: # kill: RDI<def>
; CHECK-DAG: movq PartClass@GOTPCREL(%rip), %r10
; CHECK: movq PartClass@GOTPCREL(%rip), %r10
define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
%PartObj = alloca i64*, align 8
%Vchunk = alloca i64, align 8

View File

@ -239,7 +239,6 @@ define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
; SSE2: # BB#0:
; SSE2-NEXT: movapd %xmm0, %xmm2
; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
; SSE2-NEXT: subsd %xmm3, %xmm0
; SSE2-NEXT: cvttsd2si %xmm0, %rcx
; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
@ -589,7 +588,6 @@ define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
; SSE2-LABEL: fptoui_8vf32:
; SSE2: # BB#0:
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: cvttss2si %xmm0, %rax
; SSE2-NEXT: movd %eax, %xmm0

View File

@ -843,7 +843,6 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; ALL-LABEL: insert_reg_and_zero_v4f64:
; ALL: # BB#0:
; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def>
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; ALL-NEXT: retq

View File

@ -9,7 +9,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: # kill
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pand .LCPI0_0(%rip), %xmm1
@ -19,7 +18,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSSE3-NEXT: pand .LCPI0_0(%rip), %xmm1
@ -156,7 +154,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: # kill
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; SSE2-NEXT: pand .LCPI3_0(%rip), %xmm1
@ -166,7 +163,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; SSSE3-NEXT: pand .LCPI3_0(%rip), %xmm1
@ -334,7 +330,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: # kill
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
@ -343,7 +338,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
@ -366,7 +360,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
;
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
entry:
@ -380,7 +373,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: # kill
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
@ -389,7 +381,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
@ -413,7 +404,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
;
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: retq
entry: