mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
- Two-address pass should not assume unfolding is always successful.
- X86 unfolding should check if the instructions being unfolded has memoperands. If there is no memoperands, then it must assume conservative alignment. If this would introduce an expensive sse unaligned load / store, then unfoldMemoryOperand etc. should not unfold the instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107509 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
541481f34c
commit
98ec91ea80
@ -926,14 +926,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
|
|||||||
UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
|
UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
|
||||||
unsigned Reg = MRI->createVirtualRegister(RC);
|
unsigned Reg = MRI->createVirtualRegister(RC);
|
||||||
SmallVector<MachineInstr *, 2> NewMIs;
|
SmallVector<MachineInstr *, 2> NewMIs;
|
||||||
bool Success =
|
if (!TII->unfoldMemoryOperand(MF, mi, Reg,
|
||||||
TII->unfoldMemoryOperand(MF, mi, Reg,
|
|
||||||
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
|
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
|
||||||
NewMIs);
|
NewMIs)) {
|
||||||
(void)Success;
|
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
|
||||||
assert(Success &&
|
return false;
|
||||||
"unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
|
}
|
||||||
"succeeded!");
|
|
||||||
assert(NewMIs.size() == 2 &&
|
assert(NewMIs.size() == 2 &&
|
||||||
"Unfolded a load into multiple instructions!");
|
"Unfolded a load into multiple instructions!");
|
||||||
// The load was previously folded, so this is the only use.
|
// The load was previously folded, so this is the only use.
|
||||||
|
@ -2159,7 +2159,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
|
|||||||
MachineInstr::mmo_iterator MMOBegin,
|
MachineInstr::mmo_iterator MMOBegin,
|
||||||
MachineInstr::mmo_iterator MMOEnd,
|
MachineInstr::mmo_iterator MMOEnd,
|
||||||
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
||||||
bool isAligned = (*MMOBegin)->getAlignment() >= 16;
|
bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16;
|
||||||
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
|
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
|
||||||
DebugLoc DL;
|
DebugLoc DL;
|
||||||
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
|
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
|
||||||
@ -2189,7 +2189,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
|
|||||||
MachineInstr::mmo_iterator MMOBegin,
|
MachineInstr::mmo_iterator MMOBegin,
|
||||||
MachineInstr::mmo_iterator MMOEnd,
|
MachineInstr::mmo_iterator MMOEnd,
|
||||||
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
||||||
bool isAligned = (*MMOBegin)->getAlignment() >= 16;
|
bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16;
|
||||||
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
|
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
|
||||||
DebugLoc DL;
|
DebugLoc DL;
|
||||||
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
|
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
|
||||||
@ -2693,6 +2693,13 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
|||||||
const TargetInstrDesc &TID = get(Opc);
|
const TargetInstrDesc &TID = get(Opc);
|
||||||
const TargetOperandInfo &TOI = TID.OpInfo[Index];
|
const TargetOperandInfo &TOI = TID.OpInfo[Index];
|
||||||
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
|
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
|
||||||
|
if (!MI->hasOneMemOperand() &&
|
||||||
|
RC == &X86::VR128RegClass &&
|
||||||
|
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
|
||||||
|
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
|
||||||
|
// conservatively assume the address is unaligned. That's bad for
|
||||||
|
// performance.
|
||||||
|
return false;
|
||||||
SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
|
SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
|
||||||
SmallVector<MachineOperand,2> BeforeOps;
|
SmallVector<MachineOperand,2> BeforeOps;
|
||||||
SmallVector<MachineOperand,2> AfterOps;
|
SmallVector<MachineOperand,2> AfterOps;
|
||||||
@ -2834,7 +2841,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
|||||||
MachineInstr::mmo_iterator> MMOs =
|
MachineInstr::mmo_iterator> MMOs =
|
||||||
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
|
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
|
||||||
cast<MachineSDNode>(N)->memoperands_end());
|
cast<MachineSDNode>(N)->memoperands_end());
|
||||||
bool isAligned = (*MMOs.first)->getAlignment() >= 16;
|
if (!(*MMOs.first) &&
|
||||||
|
RC == &X86::VR128RegClass &&
|
||||||
|
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
|
||||||
|
// Do not introduce a slow unaligned load.
|
||||||
|
return false;
|
||||||
|
bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
|
||||||
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
|
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
|
||||||
VT, MVT::Other, &AddrOps[0], AddrOps.size());
|
VT, MVT::Other, &AddrOps[0], AddrOps.size());
|
||||||
NewNodes.push_back(Load);
|
NewNodes.push_back(Load);
|
||||||
@ -2871,7 +2883,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
|||||||
MachineInstr::mmo_iterator> MMOs =
|
MachineInstr::mmo_iterator> MMOs =
|
||||||
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
|
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
|
||||||
cast<MachineSDNode>(N)->memoperands_end());
|
cast<MachineSDNode>(N)->memoperands_end());
|
||||||
bool isAligned = (*MMOs.first)->getAlignment() >= 16;
|
if (!(*MMOs.first) &&
|
||||||
|
RC == &X86::VR128RegClass &&
|
||||||
|
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
|
||||||
|
// Do not introduce a slow unaligned store.
|
||||||
|
return false;
|
||||||
|
bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
|
||||||
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
|
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
|
||||||
isAligned, TM),
|
isAligned, TM),
|
||||||
dl, MVT::Other,
|
dl, MVT::Other,
|
||||||
|
99
test/CodeGen/X86/2010-07-02-UnfoldBug.ll
Normal file
99
test/CodeGen/X86/2010-07-02-UnfoldBug.ll
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin
|
||||||
|
; rdar://8154265
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp {
|
||||||
|
entry:
|
||||||
|
br i1 undef, label %bb2.thread, label %bb2
|
||||||
|
|
||||||
|
bb2.thread: ; preds = %entry
|
||||||
|
br i1 undef, label %bb41, label %bb10.preheader
|
||||||
|
|
||||||
|
bb2: ; preds = %entry
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb10.preheader: ; preds = %bb2.thread
|
||||||
|
br i1 undef, label %bb9, label %bb12
|
||||||
|
|
||||||
|
bb9: ; preds = %bb9, %bb10.preheader
|
||||||
|
br i1 undef, label %bb9, label %bb12
|
||||||
|
|
||||||
|
bb12: ; preds = %bb9, %bb10.preheader
|
||||||
|
br i1 undef, label %bb4.i.i, label %bb3.i.i
|
||||||
|
|
||||||
|
bb3.i.i: ; preds = %bb12
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb4.i.i: ; preds = %bb12
|
||||||
|
br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
|
||||||
|
|
||||||
|
bb8.i.i: ; preds = %bb4.i.i
|
||||||
|
br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i
|
||||||
|
|
||||||
|
bb9.i.i: ; preds = %bb8.i.i
|
||||||
|
br i1 undef, label %bb11.i.i, label %bb10.i.i
|
||||||
|
|
||||||
|
bb10.i.i: ; preds = %bb9.i.i
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb11.i.i: ; preds = %bb9.i.i
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i
|
||||||
|
br i1 undef, label %bb19, label %bb14
|
||||||
|
|
||||||
|
bb14: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb19: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
|
||||||
|
br i1 undef, label %bb.i50, label %bb6.i
|
||||||
|
|
||||||
|
bb.i50: ; preds = %bb19
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb6.i: ; preds = %bb19
|
||||||
|
br i1 undef, label %bb28, label %bb.nph106
|
||||||
|
|
||||||
|
bb22: ; preds = %bb24.preheader
|
||||||
|
br i1 undef, label %bb2.i.i, label %bb.i.i49
|
||||||
|
|
||||||
|
bb.i.i49: ; preds = %bb22
|
||||||
|
%0 = load float* undef, align 4 ; <float> [#uses=1]
|
||||||
|
%1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1]
|
||||||
|
%2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1]
|
||||||
|
%3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
|
||||||
|
%4 = extractelement <4 x float> %3, i32 0 ; <float> [#uses=1]
|
||||||
|
store float %4, float* undef, align 4
|
||||||
|
%5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1]
|
||||||
|
%6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
|
||||||
|
%7 = extractelement <4 x float> %6, i32 0 ; <float> [#uses=1]
|
||||||
|
store float %7, float* undef, align 4
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb2.i.i: ; preds = %bb22
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb26.loopexit: ; preds = %bb24.preheader
|
||||||
|
br i1 undef, label %bb28, label %bb24.preheader
|
||||||
|
|
||||||
|
bb.nph106: ; preds = %bb6.i
|
||||||
|
br label %bb24.preheader
|
||||||
|
|
||||||
|
bb24.preheader: ; preds = %bb.nph106, %bb26.loopexit
|
||||||
|
br i1 undef, label %bb22, label %bb26.loopexit
|
||||||
|
|
||||||
|
bb28: ; preds = %bb26.loopexit, %bb6.i
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb41: ; preds = %bb2.thread
|
||||||
|
br i1 undef, label %return, label %bb46
|
||||||
|
|
||||||
|
bb46: ; preds = %bb41
|
||||||
|
ret void
|
||||||
|
|
||||||
|
return: ; preds = %bb41
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user