mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
Careful with reg_sequence coalescing to not to overwrite sub-register indices.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103971 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7f43fd84db
commit
53c779bb3a
@ -128,6 +128,8 @@ namespace {
|
||||
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
|
||||
SmallPtrSet<MachineInstr*, 8> &Processed);
|
||||
|
||||
void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
|
||||
|
||||
/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
|
||||
/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
|
||||
/// sub-register references of the register defined by REG_SEQUENCE.
|
||||
@ -1132,7 +1134,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
static void UpdateRegSequenceSrcs(unsigned SrcReg,
|
||||
unsigned DstReg, unsigned SrcIdx,
|
||||
unsigned DstReg, unsigned SubIdx,
|
||||
MachineRegisterInfo *MRI) {
|
||||
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
|
||||
RE = MRI->reg_end(); RI != RE; ) {
|
||||
@ -1140,7 +1142,77 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg,
|
||||
++RI;
|
||||
MO.setReg(DstReg);
|
||||
assert(MO.getSubReg() == 0);
|
||||
MO.setSubReg(SrcIdx);
|
||||
MO.setSubReg(SubIdx);
|
||||
}
|
||||
}
|
||||
|
||||
/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
|
||||
/// EXTRACT_SUBREG from the same register and to the same virtual register
|
||||
/// with different sub-register indices, attempt to combine the
|
||||
/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
|
||||
/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
|
||||
/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
|
||||
/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
|
||||
/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
|
||||
/// reg1026 to reg1029.
|
||||
void
|
||||
TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
|
||||
unsigned DstReg) {
|
||||
SmallSet<unsigned, 4> Seen;
|
||||
for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
|
||||
unsigned SrcReg = Srcs[i];
|
||||
if (!Seen.insert(SrcReg))
|
||||
continue;
|
||||
|
||||
// If there are no other uses than extract_subreg which feed into
|
||||
// the reg_sequence, then we might be able to coalesce them.
|
||||
bool CanCoalesce = true;
|
||||
SmallVector<unsigned, 4> SubIndices;
|
||||
for (MachineRegisterInfo::use_nodbg_iterator
|
||||
UI = MRI->use_nodbg_begin(SrcReg),
|
||||
UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
|
||||
MachineInstr *UseMI = &*UI;
|
||||
if (!UseMI->isExtractSubreg() ||
|
||||
UseMI->getOperand(0).getReg() != DstReg) {
|
||||
CanCoalesce = false;
|
||||
break;
|
||||
}
|
||||
SubIndices.push_back(UseMI->getOperand(2).getImm());
|
||||
}
|
||||
|
||||
if (!CanCoalesce || SubIndices.size() < 2)
|
||||
continue;
|
||||
|
||||
std::sort(SubIndices.begin(), SubIndices.end());
|
||||
unsigned NewSubIdx = 0;
|
||||
if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
|
||||
NewSubIdx)) {
|
||||
bool Proceed = true;
|
||||
if (NewSubIdx)
|
||||
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
|
||||
RE = MRI->reg_end(); RI != RE; ) {
|
||||
MachineOperand &MO = RI.getOperand();
|
||||
++RI;
|
||||
// FIXME: If the sub-registers do not combine to the whole
|
||||
// super-register, i.e. NewSubIdx != 0, and any of the use has a
|
||||
// sub-register index, then abort the coalescing attempt.
|
||||
if (MO.getSubReg()) {
|
||||
Proceed = false;
|
||||
break;
|
||||
}
|
||||
MO.setReg(DstReg);
|
||||
MO.setSubReg(NewSubIdx);
|
||||
}
|
||||
if (Proceed)
|
||||
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
|
||||
RE = MRI->reg_end(); RI != RE; ) {
|
||||
MachineOperand &MO = RI.getOperand();
|
||||
++RI;
|
||||
MO.setReg(DstReg);
|
||||
if (NewSubIdx)
|
||||
MO.setSubReg(NewSubIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1221,50 +1293,15 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
|
||||
|
||||
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
|
||||
unsigned SrcReg = MI->getOperand(i).getReg();
|
||||
unsigned SrcIdx = MI->getOperand(i+1).getImm();
|
||||
UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI);
|
||||
unsigned SubIdx = MI->getOperand(i+1).getImm();
|
||||
UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI);
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Eliminated: " << *MI);
|
||||
MI->eraseFromParent();
|
||||
|
||||
// Try coalescing some EXTRACT_SUBREG instructions.
|
||||
Seen.clear();
|
||||
for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) {
|
||||
unsigned SrcReg = RealSrcs[i];
|
||||
if (!Seen.insert(SrcReg))
|
||||
continue;
|
||||
|
||||
// If there are no other uses than extract_subreg which feed into
|
||||
// the reg_sequence, then we might be able to coalesce them.
|
||||
bool CanCoalesce = true;
|
||||
SmallVector<unsigned, 4> SubIndices;
|
||||
for (MachineRegisterInfo::use_nodbg_iterator
|
||||
UI = MRI->use_nodbg_begin(SrcReg),
|
||||
UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
|
||||
MachineInstr *UseMI = &*UI;
|
||||
if (!UseMI->isExtractSubreg() ||
|
||||
UseMI->getOperand(0).getReg() != DstReg) {
|
||||
CanCoalesce = false;
|
||||
break;
|
||||
}
|
||||
SubIndices.push_back(UseMI->getOperand(2).getImm());
|
||||
}
|
||||
|
||||
if (!CanCoalesce)
|
||||
continue;
|
||||
|
||||
// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
|
||||
// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
|
||||
// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
|
||||
// Since D subregs 5, 6 can combine to a Q register, we can coalesce
|
||||
// reg1026 to reg1029.
|
||||
std::sort(SubIndices.begin(), SubIndices.end());
|
||||
unsigned NewSubIdx = 0;
|
||||
if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
|
||||
NewSubIdx))
|
||||
UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI);
|
||||
}
|
||||
CoalesceExtSubRegs(RealSrcs, DstReg);
|
||||
}
|
||||
|
||||
RegSequences.clear();
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
%struct.int16x8_t = type { <8 x i16> }
|
||||
%struct.int32x4_t = type { <4 x i32> }
|
||||
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
|
||||
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
|
||||
%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
|
||||
%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
|
||||
@ -149,12 +150,51 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
|
||||
ret <8 x i16> %tmp5
|
||||
}
|
||||
|
||||
define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK: t6:
|
||||
; CHECK: vldr.64
|
||||
; CHECK: vmov d1, d0
|
||||
; CHECK-NEXT: vld2.8 {d0[1], d1[1]}
|
||||
%tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2]
|
||||
%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
|
||||
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
|
||||
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
|
||||
%tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1]
|
||||
ret <8 x i8> %tmp5
|
||||
}
|
||||
|
||||
define arm_apcscc void @t7(i32* %iptr, i32* %optr) nounwind {
|
||||
entry:
|
||||
; CHECK: t7:
|
||||
; CHECK: vld2.32
|
||||
; CHECK: vst2.32
|
||||
; CHECK: vld1.32 {d0, d1},
|
||||
; CHECK: vmov q1, q0
|
||||
; CHECK-NOT: vmov
|
||||
; CHECK: vuzp.32 q0, q1
|
||||
; CHECK: vst1.32
|
||||
%0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2]
|
||||
%1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2]
|
||||
%tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
|
||||
%tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
|
||||
%2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2]
|
||||
tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60)
|
||||
%3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1]
|
||||
%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
|
||||
tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
|
||||
@ -163,6 +203,8 @@ declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonl
|
||||
|
||||
declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
|
||||
|
||||
declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
|
||||
|
||||
declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
|
||||
|
||||
declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
|
||||
|
Loading…
Reference in New Issue
Block a user