mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Merge AVX_SET0PSY/AVX_SET0PDY/AVX2_SET0 into a single post-RA pseudo.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162738 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2f820a5d64
commit
13897fb263
@ -3429,6 +3429,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
||||
case X86::FsFLD0SS:
|
||||
case X86::FsFLD0SD:
|
||||
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
|
||||
case X86::AVX_SET0:
|
||||
assert(HasAVX && "AVX not supported");
|
||||
return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
|
||||
case X86::TEST8ri_NOREX:
|
||||
MI->setDesc(get(X86::TEST8ri));
|
||||
return true;
|
||||
@ -3780,10 +3783,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
|
||||
else
|
||||
switch (LoadMI->getOpcode()) {
|
||||
case X86::AVX_SET0PSY:
|
||||
case X86::AVX_SET0PDY:
|
||||
case X86::AVX2_SETALLONES:
|
||||
case X86::AVX2_SET0:
|
||||
case X86::AVX_SET0:
|
||||
Alignment = 32;
|
||||
break;
|
||||
case X86::V_SET0:
|
||||
@ -3824,11 +3825,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
switch (LoadMI->getOpcode()) {
|
||||
case X86::V_SET0:
|
||||
case X86::V_SETALLONES:
|
||||
case X86::AVX_SET0PSY:
|
||||
case X86::AVX_SET0PDY:
|
||||
case X86::AVX_SETALLONES:
|
||||
case X86::AVX2_SETALLONES:
|
||||
case X86::AVX2_SET0:
|
||||
case X86::AVX_SET0:
|
||||
case X86::FsFLD0SD:
|
||||
case X86::FsFLD0SS: {
|
||||
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
|
||||
@ -3860,9 +3859,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
Ty = Type::getFloatTy(MF.getFunction()->getContext());
|
||||
else if (Opc == X86::FsFLD0SD)
|
||||
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
|
||||
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
|
||||
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
|
||||
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
|
||||
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
|
||||
else
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
|
||||
|
@ -382,12 +382,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
// We set canFoldAsLoad because this can be converted to a constant-pool
|
||||
// load of an all-zeros value if folding it would be beneficial.
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isPseudo = 1, neverHasSideEffects = 1 in {
|
||||
isPseudo = 1 in {
|
||||
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
|
||||
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
|
||||
}
|
||||
|
||||
def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
|
||||
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
|
||||
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
|
||||
def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
|
||||
@ -395,30 +394,24 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
|
||||
def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
|
||||
|
||||
|
||||
// The same as done above but for AVX. The 256-bit ISA does not support PI,
|
||||
// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
|
||||
// and doesn't need it because on sandy bridge the register is set to zero
|
||||
// at the rename stage without using any execution unit, so SET0PSY
|
||||
// and SET0PDY can be used for vector int instructions without penalty
|
||||
// FIXME: Change encoding to pseudo! This is blocked right now by the x86
|
||||
// JIT implementatioan, it does not expand the instructions below like
|
||||
// X86MCInstLower does.
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isCodeGenOnly = 1 in {
|
||||
let Predicates = [HasAVX] in {
|
||||
def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
|
||||
[(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
|
||||
def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
|
||||
[(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
|
||||
}
|
||||
let Predicates = [HasAVX2] in
|
||||
def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
|
||||
[(set VR256:$dst, (v4i64 immAllZerosV))]>, VEX_4V;
|
||||
isPseudo = 1, Predicates = [HasAVX] in {
|
||||
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
|
||||
[(set VR256:$dst, (v8f32 immAllZerosV))]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
|
||||
def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
|
||||
def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
|
||||
def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
|
||||
def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>;
|
||||
def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
|
||||
def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
|
||||
}
|
||||
|
||||
// AVX1 has no support for 256-bit integer instructions, but since the 128-bit
|
||||
|
@ -378,11 +378,8 @@ ReSimplify:
|
||||
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
|
||||
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
|
||||
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
|
||||
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
|
||||
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
|
||||
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
|
||||
case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
|
||||
case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
|
||||
|
||||
case X86::MOV16r0:
|
||||
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
|
||||
|
@ -5,7 +5,7 @@
|
||||
; It's hard to test for the ISEL condition because CodeGen optimizes
|
||||
; away the bugpointed code. Just ensure the basics are still there.
|
||||
;CHECK: func:
|
||||
;CHECK: vpxor
|
||||
;CHECK: vxorps
|
||||
;CHECK: vinsertf128
|
||||
;CHECK: vpshufd
|
||||
;CHECK: vpshufd
|
||||
|
Loading…
Reference in New Issue
Block a user