mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 16:17:05 +00:00 
			
		
		
		
	Careful with reg_sequence coalescing to not to overwrite sub-register indices.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103971 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -128,6 +128,8 @@ namespace { | ||||
|     void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, | ||||
|                      SmallPtrSet<MachineInstr*, 8> &Processed); | ||||
|  | ||||
|     void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); | ||||
|  | ||||
|     /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part | ||||
|     /// of the de-ssa process. This replaces sources of REG_SEQUENCE as | ||||
|     /// sub-register references of the register defined by REG_SEQUENCE. | ||||
| @@ -1132,7 +1134,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { | ||||
| } | ||||
|  | ||||
| static void UpdateRegSequenceSrcs(unsigned SrcReg, | ||||
|                                   unsigned DstReg, unsigned SrcIdx, | ||||
|                                   unsigned DstReg, unsigned SubIdx, | ||||
|                                   MachineRegisterInfo *MRI) { | ||||
|   for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), | ||||
|          RE = MRI->reg_end(); RI != RE; ) { | ||||
| @@ -1140,7 +1142,77 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg, | ||||
|     ++RI; | ||||
|     MO.setReg(DstReg); | ||||
|     assert(MO.getSubReg() == 0); | ||||
|     MO.setSubReg(SrcIdx); | ||||
|     MO.setSubReg(SubIdx); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are | ||||
| /// EXTRACT_SUBREG from the same register and to the same virtual register | ||||
| /// with different sub-register indices, attempt to combine the | ||||
| /// EXTRACT_SUBREGs and pre-coalesce them. e.g. | ||||
| /// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 | ||||
| /// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 | ||||
| /// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 | ||||
| /// Since D subregs 5, 6 can combine to a Q register, we can coalesce | ||||
| /// reg1026 to reg1029. | ||||
| void | ||||
| TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, | ||||
|                                               unsigned DstReg) { | ||||
|   SmallSet<unsigned, 4> Seen; | ||||
|   for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { | ||||
|     unsigned SrcReg = Srcs[i]; | ||||
|     if (!Seen.insert(SrcReg)) | ||||
|       continue; | ||||
|  | ||||
|     // If there are no other uses than extract_subreg which feed into | ||||
|     // the reg_sequence, then we might be able to coalesce them. | ||||
|     bool CanCoalesce = true; | ||||
|     SmallVector<unsigned, 4> SubIndices; | ||||
|     for (MachineRegisterInfo::use_nodbg_iterator | ||||
|            UI = MRI->use_nodbg_begin(SrcReg), | ||||
|            UE = MRI->use_nodbg_end(); UI != UE; ++UI) { | ||||
|       MachineInstr *UseMI = &*UI; | ||||
|       if (!UseMI->isExtractSubreg() || | ||||
|           UseMI->getOperand(0).getReg() != DstReg) { | ||||
|         CanCoalesce = false; | ||||
|         break; | ||||
|       } | ||||
|       SubIndices.push_back(UseMI->getOperand(2).getImm()); | ||||
|     } | ||||
|  | ||||
|     if (!CanCoalesce || SubIndices.size() < 2) | ||||
|       continue; | ||||
|  | ||||
|     std::sort(SubIndices.begin(), SubIndices.end()); | ||||
|     unsigned NewSubIdx = 0; | ||||
|     if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, | ||||
|                                     NewSubIdx)) { | ||||
|       bool Proceed = true; | ||||
|       if (NewSubIdx) | ||||
|         for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), | ||||
|                RE = MRI->reg_end(); RI != RE; ) { | ||||
|           MachineOperand &MO = RI.getOperand(); | ||||
|           ++RI; | ||||
|           // FIXME: If the sub-registers do not combine to the whole | ||||
|           // super-register, i.e. NewSubIdx != 0, and any of the use has a | ||||
|           // sub-register index, then abort the coalescing attempt. | ||||
|           if (MO.getSubReg()) { | ||||
|             Proceed = false; | ||||
|             break; | ||||
|           } | ||||
|           MO.setReg(DstReg); | ||||
|           MO.setSubReg(NewSubIdx); | ||||
|         } | ||||
|       if (Proceed) | ||||
|         for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), | ||||
|                RE = MRI->reg_end(); RI != RE; ) { | ||||
|           MachineOperand &MO = RI.getOperand(); | ||||
|           ++RI; | ||||
|           MO.setReg(DstReg); | ||||
|           if (NewSubIdx) | ||||
|             MO.setSubReg(NewSubIdx); | ||||
|         } | ||||
|       } | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -1221,50 +1293,15 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { | ||||
|  | ||||
|     for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { | ||||
|       unsigned SrcReg = MI->getOperand(i).getReg(); | ||||
|       unsigned SrcIdx = MI->getOperand(i+1).getImm(); | ||||
|       UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI); | ||||
|       unsigned SubIdx = MI->getOperand(i+1).getImm(); | ||||
|       UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI); | ||||
|     } | ||||
|  | ||||
|     DEBUG(dbgs() << "Eliminated: " << *MI); | ||||
|     MI->eraseFromParent(); | ||||
|  | ||||
|     // Try coalescing some EXTRACT_SUBREG instructions. | ||||
|     Seen.clear(); | ||||
|     for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) { | ||||
|       unsigned SrcReg = RealSrcs[i]; | ||||
|       if (!Seen.insert(SrcReg)) | ||||
|         continue; | ||||
|  | ||||
|       // If there are no other uses than extract_subreg which feed into | ||||
|       // the reg_sequence, then we might be able to coalesce them. | ||||
|       bool CanCoalesce = true; | ||||
|       SmallVector<unsigned, 4> SubIndices; | ||||
|       for (MachineRegisterInfo::use_nodbg_iterator | ||||
|              UI = MRI->use_nodbg_begin(SrcReg), | ||||
|              UE = MRI->use_nodbg_end(); UI != UE; ++UI) { | ||||
|         MachineInstr *UseMI = &*UI; | ||||
|         if (!UseMI->isExtractSubreg() || | ||||
|             UseMI->getOperand(0).getReg() != DstReg) { | ||||
|           CanCoalesce = false; | ||||
|           break; | ||||
|         } | ||||
|         SubIndices.push_back(UseMI->getOperand(2).getImm()); | ||||
|       } | ||||
|  | ||||
|       if (!CanCoalesce) | ||||
|         continue; | ||||
|  | ||||
|       // %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 | ||||
|       // %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 | ||||
|       // %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 | ||||
|       // Since D subregs 5, 6 can combine to a Q register, we can coalesce | ||||
|       // reg1026 to reg1029. | ||||
|       std::sort(SubIndices.begin(), SubIndices.end()); | ||||
|       unsigned NewSubIdx = 0; | ||||
|       if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, | ||||
|                                       NewSubIdx)) | ||||
|         UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI); | ||||
|     } | ||||
|     CoalesceExtSubRegs(RealSrcs, DstReg); | ||||
|   } | ||||
|  | ||||
|   RegSequences.clear(); | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
|  | ||||
| %struct.int16x8_t = type { <8 x i16> } | ||||
| %struct.int32x4_t = type { <4 x i32> } | ||||
| %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } | ||||
| %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> } | ||||
| %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } | ||||
| %struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } | ||||
| @@ -149,12 +150,51 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { | ||||
|   ret <8 x i16> %tmp5 | ||||
| } | ||||
|  | ||||
| define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { | ||||
| ; CHECK:        t6: | ||||
| ; CHECK:        vldr.64 | ||||
| ; CHECK:        vmov d1, d0 | ||||
| ; CHECK-NEXT:   vld2.8 {d0[1], d1[1]} | ||||
|   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2] | ||||
|   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] | ||||
|   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] | ||||
|   %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1] | ||||
|   %tmp5 = add <8 x i8> %tmp3, %tmp4               ; <<8 x i8>> [#uses=1] | ||||
|   ret <8 x i8> %tmp5 | ||||
| } | ||||
|  | ||||
| define arm_apcscc void @t7(i32* %iptr, i32* %optr) nounwind { | ||||
| entry: | ||||
| ; CHECK:        t7: | ||||
| ; CHECK:        vld2.32 | ||||
| ; CHECK:        vst2.32 | ||||
| ; CHECK:        vld1.32 {d0, d1}, | ||||
| ; CHECK:        vmov q1, q0 | ||||
| ; CHECK-NOT:    vmov | ||||
| ; CHECK:        vuzp.32 q0, q1 | ||||
| ; CHECK:        vst1.32 | ||||
|   %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2] | ||||
|   %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2] | ||||
|   %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1] | ||||
|   %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1] | ||||
|   %2 = bitcast i32* %optr to i8*                  ; <i8*> [#uses=2] | ||||
|   tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60) | ||||
|   %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1] | ||||
|   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1] | ||||
|   tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4) | ||||
|   ret void | ||||
| } | ||||
|  | ||||
| declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly | ||||
|  | ||||
| declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly | ||||
|  | ||||
| declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone | ||||
|  | ||||
| declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone | ||||
|  | ||||
| declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind | ||||
|  | ||||
| declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind | ||||
|  | ||||
| declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind | ||||
| @@ -163,6 +203,8 @@ declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonl | ||||
|  | ||||
| declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly | ||||
|  | ||||
| declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly | ||||
|  | ||||
| declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly | ||||
|  | ||||
| declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind | ||||
|   | ||||
		Reference in New Issue
	
	Block a user