[PPC64LE] Remove implicit-subreg restriction from VSX swap removal

In r241285, I removed the SUBREG_TO_REG restriction from VSX swap
removal, determining that this was overly conservative.  We have
another form of the same restriction in that we check for the presence
of implicit subregs in vector operations.  As with SUBREG_TO_REG for
partial register conversions, an implicit subreg is safe in and of
itself, provided no other operation makes a lane-sensitive assumption
about the result.  This patch removes that restriction, by removing
the HasImplicitSubreg flag and all code that relies on it.

I've added a test case that fails to optimize before this patch is
applied, and optimizes properly with the patch.  Test based on a
report from Anton Blanchard.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241290 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2015-07-02 19:01:22 +00:00
parent 1e09eba39d
commit 397fac95d5
2 changed files with 33 additions and 26 deletions

View File

@ -79,7 +79,6 @@ struct PPCVSXSwapEntry {
unsigned int IsStore : 1; unsigned int IsStore : 1;
unsigned int IsSwap : 1; unsigned int IsSwap : 1;
unsigned int MentionsPhysVR : 1; unsigned int MentionsPhysVR : 1;
unsigned int HasImplicitSubreg : 1;
unsigned int IsSwappable : 1; unsigned int IsSwappable : 1;
unsigned int SpecialHandling : 3; unsigned int SpecialHandling : 3;
unsigned int WebRejected : 1; unsigned int WebRejected : 1;
@ -224,7 +223,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
for (MachineInstr &MI : MBB) { for (MachineInstr &MI : MBB) {
bool RelevantInstr = false; bool RelevantInstr = false;
bool ImplicitSubreg = false;
for (const MachineOperand &MO : MI.operands()) { for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) if (!MO.isReg())
@ -232,8 +230,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
unsigned Reg = MO.getReg(); unsigned Reg = MO.getReg();
if (isVecReg(Reg)) { if (isVecReg(Reg)) {
RelevantInstr = true; RelevantInstr = true;
if (MO.getSubReg() != 0)
ImplicitSubreg = true;
break; break;
} }
} }
@ -249,9 +245,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
PPCVSXSwapEntry SwapEntry{}; PPCVSXSwapEntry SwapEntry{};
int VecIdx = addSwapEntry(&MI, SwapEntry); int VecIdx = addSwapEntry(&MI, SwapEntry);
if (ImplicitSubreg)
SwapVector[VecIdx].HasImplicitSubreg = 1;
switch(MI.getOpcode()) { switch(MI.getOpcode()) {
default: default:
// Unless noted otherwise, an instruction is considered // Unless noted otherwise, an instruction is considered
@ -466,23 +459,19 @@ int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
// such operations to the ultimate source register. If a // such operations to the ultimate source register. If a
// physical register is encountered, we stop the search and // physical register is encountered, we stop the search and
// flag the swap entry indicated by VecIdx (the original // flag the swap entry indicated by VecIdx (the original
// XXPERMDI) as mentioning a physical register. Similarly // XXPERMDI) as mentioning a physical register.
// for implicit subregister mentions (which should never
// happen).
unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
unsigned VecIdx) { unsigned VecIdx) {
MachineInstr *MI = MRI->getVRegDef(SrcReg); MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike()) if (!MI->isCopyLike())
return SrcReg; return SrcReg;
unsigned CopySrcReg, CopySrcSubreg; unsigned CopySrcReg;
if (MI->isCopy()) { if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg(); CopySrcReg = MI->getOperand(1).getReg();
CopySrcSubreg = MI->getOperand(1).getSubReg(); else {
} else {
assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg(); CopySrcReg = MI->getOperand(2).getReg();
CopySrcSubreg = MI->getOperand(2).getSubReg();
} }
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) { if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
@ -490,11 +479,6 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
return CopySrcReg; return CopySrcReg;
} }
if (CopySrcSubreg != 0) {
SwapVector[VecIdx].HasImplicitSubreg = 1;
return CopySrcReg;
}
return lookThruCopyLike(CopySrcReg, VecIdx); return lookThruCopyLike(CopySrcReg, VecIdx);
} }
@ -561,11 +545,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) { for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId); int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
// Reject webs containing mentions of physical registers or implicit // Reject webs containing mentions of physical registers, or containing
// subregs, or containing operations that we don't know how to handle // operations that we don't know how to handle in a lane-permuted region.
// in a lane-permuted region.
if (SwapVector[EntryIdx].MentionsPhysVR || if (SwapVector[EntryIdx].MentionsPhysVR ||
SwapVector[EntryIdx].HasImplicitSubreg ||
!(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) { !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
SwapVector[Repr].WebRejected = 1; SwapVector[Repr].WebRejected = 1;
@ -774,8 +756,6 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
DEBUG(dbgs() << "swap "); DEBUG(dbgs() << "swap ");
if (SwapVector[EntryIdx].MentionsPhysVR) if (SwapVector[EntryIdx].MentionsPhysVR)
DEBUG(dbgs() << "physreg "); DEBUG(dbgs() << "physreg ");
if (SwapVector[EntryIdx].HasImplicitSubreg)
DEBUG(dbgs() << "implsubreg ");
if (SwapVector[EntryIdx].IsSwappable) { if (SwapVector[EntryIdx].IsSwappable) {
DEBUG(dbgs() << "swappable "); DEBUG(dbgs() << "swappable ");

View File

@ -0,0 +1,27 @@
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
; This test verifies that VSX swap optimization works when an implicit
; subregister is present (in this case, in the XXPERMDI associated with
; the store).
define void @bar() {
entry:
%x = alloca <2 x i64>, align 16
%0 = bitcast <2 x i64>* %x to i8*
call void @llvm.lifetime.start(i64 16, i8* %0)
%arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %x, i64 0, i64 0
store <2 x i64> <i64 0, i64 1>, <2 x i64>* %x, align 16
call void @foo(i64* %arrayidx)
call void @llvm.lifetime.end(i64 16, i8* %0)
ret void
}
; CHECK-LABEL: @bar
; CHECK: lxvd2x
; CHECK: stxvd2x
; CHECK-NOT: xxswapd
declare void @llvm.lifetime.start(i64, i8* nocapture)
declare void @foo(i64*)
declare void @llvm.lifetime.end(i64, i8* nocapture)