mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-19 18:24:00 +00:00
Allow MachineCSE to coalesce trivial subregister copies the same way
that it coalesces normal copies. Without this, MachineCSE is powerless to handle redundant operations with truncated source operands. This required fixing the 2-addr pass to handle tied subregisters. It isn't clear what combinations of subregisters can legally be tied, but the simple case of truncated source operands is now safely handled: %vreg11<def> = COPY %vreg1:sub_32bit; GR32:%vreg11 GR64:%vreg1 %vreg12<def> = COPY %vreg2:sub_32bit; GR32:%vreg12 GR64:%vreg2 %vreg13<def,tied1> = ADD32rr %vreg11<tied0>, %vreg12<kill>, %EFLAGS<imp-def> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197414 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -131,13 +131,18 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
|
|||||||
unsigned SrcReg = DefMI->getOperand(1).getReg();
|
unsigned SrcReg = DefMI->getOperand(1).getReg();
|
||||||
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
|
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
|
||||||
continue;
|
continue;
|
||||||
if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
|
if (DefMI->getOperand(0).getSubReg())
|
||||||
continue;
|
continue;
|
||||||
if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
|
unsigned SrcSubReg = DefMI->getOperand(1).getSubReg();
|
||||||
|
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||||
|
if (SrcSubReg)
|
||||||
|
RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
|
||||||
|
SrcSubReg);
|
||||||
|
if (!MRI->constrainRegClass(SrcReg, RC))
|
||||||
continue;
|
continue;
|
||||||
DEBUG(dbgs() << "Coalescing: " << *DefMI);
|
DEBUG(dbgs() << "Coalescing: " << *DefMI);
|
||||||
DEBUG(dbgs() << "*** to: " << *MI);
|
DEBUG(dbgs() << "*** to: " << *MI);
|
||||||
MO.setReg(SrcReg);
|
MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
|
||||||
MRI->clearKillFlags(SrcReg);
|
MRI->clearKillFlags(SrcReg);
|
||||||
DefMI->eraseFromParent();
|
DefMI->eraseFromParent();
|
||||||
++NumCoalesces;
|
++NumCoalesces;
|
||||||
|
@ -1349,6 +1349,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
|
|||||||
unsigned LastCopiedReg = 0;
|
unsigned LastCopiedReg = 0;
|
||||||
SlotIndex LastCopyIdx;
|
SlotIndex LastCopyIdx;
|
||||||
unsigned RegB = 0;
|
unsigned RegB = 0;
|
||||||
|
unsigned SubRegB = 0;
|
||||||
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
|
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
|
||||||
unsigned SrcIdx = TiedPairs[tpi].first;
|
unsigned SrcIdx = TiedPairs[tpi].first;
|
||||||
unsigned DstIdx = TiedPairs[tpi].second;
|
unsigned DstIdx = TiedPairs[tpi].second;
|
||||||
@ -1359,6 +1360,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
|
|||||||
// Grab RegB from the instruction because it may have changed if the
|
// Grab RegB from the instruction because it may have changed if the
|
||||||
// instruction was commuted.
|
// instruction was commuted.
|
||||||
RegB = MI->getOperand(SrcIdx).getReg();
|
RegB = MI->getOperand(SrcIdx).getReg();
|
||||||
|
SubRegB = MI->getOperand(SrcIdx).getSubReg();
|
||||||
|
|
||||||
if (RegA == RegB) {
|
if (RegA == RegB) {
|
||||||
// The register is tied to multiple destinations (or else we would
|
// The register is tied to multiple destinations (or else we would
|
||||||
@ -1383,8 +1385,25 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Emit a copy.
|
// Emit a copy.
|
||||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
||||||
TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
|
TII->get(TargetOpcode::COPY), RegA);
|
||||||
|
// If this operand is folding a truncation, the truncation now moves to the
|
||||||
|
// copy so that the register classes remain valid for the operands.
|
||||||
|
MIB.addReg(RegB, 0, SubRegB);
|
||||||
|
const TargetRegisterClass *RC = MRI->getRegClass(RegB);
|
||||||
|
if (SubRegB) {
|
||||||
|
if (TargetRegisterInfo::isVirtualRegister(RegA)) {
|
||||||
|
assert(TRI->getMatchingSuperRegClass(MRI->getRegClass(RegB),
|
||||||
|
MRI->getRegClass(RegA), SubRegB) &&
|
||||||
|
"tied subregister must be a truncation");
|
||||||
|
// The superreg class will not be used to constrain the subreg class.
|
||||||
|
RC = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
|
||||||
|
&& "tied subregister must be a truncation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update DistanceMap.
|
// Update DistanceMap.
|
||||||
MachineBasicBlock::iterator PrevMI = MI;
|
MachineBasicBlock::iterator PrevMI = MI;
|
||||||
@ -1404,7 +1423,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
|
DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
|
||||||
|
|
||||||
MachineOperand &MO = MI->getOperand(SrcIdx);
|
MachineOperand &MO = MI->getOperand(SrcIdx);
|
||||||
assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
|
assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
|
||||||
@ -1417,9 +1436,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
|
|||||||
// Make sure regA is a legal regclass for the SrcIdx operand.
|
// Make sure regA is a legal regclass for the SrcIdx operand.
|
||||||
if (TargetRegisterInfo::isVirtualRegister(RegA) &&
|
if (TargetRegisterInfo::isVirtualRegister(RegA) &&
|
||||||
TargetRegisterInfo::isVirtualRegister(RegB))
|
TargetRegisterInfo::isVirtualRegister(RegB))
|
||||||
MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
|
MRI->constrainRegClass(RegA, RC);
|
||||||
|
|
||||||
MO.setReg(RegA);
|
MO.setReg(RegA);
|
||||||
|
MO.setSubReg(0);
|
||||||
|
|
||||||
// Propagate SrcRegMap.
|
// Propagate SrcRegMap.
|
||||||
SrcRegMap[RegA] = RegB;
|
SrcRegMap[RegA] = RegB;
|
||||||
@ -1431,12 +1450,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
|
|||||||
// Replace other (un-tied) uses of regB with LastCopiedReg.
|
// Replace other (un-tied) uses of regB with LastCopiedReg.
|
||||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||||
MachineOperand &MO = MI->getOperand(i);
|
MachineOperand &MO = MI->getOperand(i);
|
||||||
if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
|
if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
|
||||||
|
MO.isUse()) {
|
||||||
if (MO.isKill()) {
|
if (MO.isKill()) {
|
||||||
MO.setIsKill(false);
|
MO.setIsKill(false);
|
||||||
RemovedKillFlag = true;
|
RemovedKillFlag = true;
|
||||||
}
|
}
|
||||||
MO.setReg(LastCopiedReg);
|
MO.setReg(LastCopiedReg);
|
||||||
|
MO.setSubReg(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,8 +41,8 @@ declare void @bar(i64) nounwind
|
|||||||
|
|
||||||
define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
|
define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
|
||||||
; CHECK-LABEL: test3:
|
; CHECK-LABEL: test3:
|
||||||
; CHECK: cmovnel %edi, %esi
|
; CHECK: cmov{{n?}}el %[[R1:e..]], %[[R2:e..]]
|
||||||
; CHECK-NEXT: movl %esi, %edi
|
; CHECK-NEXT: movl %[[R2]], %[[R2]]
|
||||||
|
|
||||||
%c = trunc i64 %a to i32
|
%c = trunc i64 %a to i32
|
||||||
%d = trunc i64 %b to i32
|
%d = trunc i64 %b to i32
|
||||||
|
42
test/CodeGen/X86/cse-add-with-overflow.ll
Normal file
42
test/CodeGen/X86/cse-add-with-overflow.ll
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s
|
||||||
|
; rdar:15661073 simple example of redundant adds
|
||||||
|
;
|
||||||
|
; MachineCSE should coalesce trivial subregister copies.
|
||||||
|
;
|
||||||
|
; The extra movl+addl should be removed during MachineCSE.
|
||||||
|
; CHECK-LABEL: redundantadd
|
||||||
|
; CHECK: cmpq
|
||||||
|
; CHECK: movq
|
||||||
|
; CHECK-NOT: movl
|
||||||
|
; CHECK: addl
|
||||||
|
; CHECK-NOT: addl
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define i64 @redundantadd(i64* %a0, i64* %a1) {
|
||||||
|
entry:
|
||||||
|
%tmp8 = load i64* %a0, align 8
|
||||||
|
%tmp12 = load i64* %a1, align 8
|
||||||
|
%tmp13 = icmp ult i64 %tmp12, -281474976710656
|
||||||
|
br i1 %tmp13, label %exit1, label %body
|
||||||
|
|
||||||
|
exit1:
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
body:
|
||||||
|
%tmp14 = trunc i64 %tmp8 to i32
|
||||||
|
%tmp15 = trunc i64 %tmp12 to i32
|
||||||
|
%tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15)
|
||||||
|
%tmp17 = extractvalue { i32, i1 } %tmp16, 1
|
||||||
|
br i1 %tmp17, label %exit2, label %return
|
||||||
|
|
||||||
|
exit2:
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
return:
|
||||||
|
%tmp18 = add i64 %tmp12, %tmp8
|
||||||
|
%tmp19 = and i64 %tmp18, 4294967295
|
||||||
|
%tmp20 = or i64 %tmp19, -281474976710656
|
||||||
|
ret i64 %tmp20
|
||||||
|
}
|
||||||
|
|
||||||
|
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
|
Reference in New Issue
Block a user