mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 06:30:16 +00:00
Codegen allonesvector better while using AVX: vpcmpeqd + vinsertf128
This also fixes PR10452 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136004 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
51e92e8e41
commit
863bd9d5cf
@ -3831,21 +3831,25 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
/// getOnesVector - Returns a vector of specified type with all bits set.
|
||||
/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
|
||||
/// their original type, ensuring they get CSE'd.
|
||||
/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
|
||||
/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
|
||||
/// original type, ensuring they get CSE'd.
|
||||
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
|
||||
assert(VT.isVector() && "Expected a vector type");
|
||||
assert((VT.is128BitVector() || VT.is256BitVector())
|
||||
&& "Expected a 128-bit or 256-bit vector type");
|
||||
|
||||
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
Cst, Cst, Cst, Cst);
|
||||
|
||||
SDValue Vec;
|
||||
if (VT.is256BitVector()) {
|
||||
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
|
||||
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
|
||||
} else
|
||||
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
|
||||
SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
|
||||
Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
|
||||
Vec = Insert128BitVector(InsV, Vec,
|
||||
DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
|
||||
}
|
||||
|
||||
@ -12023,6 +12027,35 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
|
||||
/// so it can be folded inside ANDNP.
|
||||
static bool CanFoldXORWithAllOnes(const SDNode *N) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Match direct AllOnes for 128 and 256-bit vectors
|
||||
if (ISD::isBuildVectorAllOnes(N))
|
||||
return true;
|
||||
|
||||
// Look through a bit convert.
|
||||
if (N->getOpcode() == ISD::BITCAST)
|
||||
N = N->getOperand(0).getNode();
|
||||
|
||||
// Sometimes the operand may come from a insert_subvector building a 256-bit
|
||||
// allones vector
|
||||
SDValue V1 = N->getOperand(0);
|
||||
SDValue V2 = N->getOperand(1);
|
||||
|
||||
if (VT.getSizeInBits() == 256 &&
|
||||
N->getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
V1.getOperand(0).getOpcode() == ISD::UNDEF &&
|
||||
ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
|
||||
ISD::isBuildVectorAllOnes(V2.getNode()))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
@ -12047,12 +12080,14 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Check LHS for vnot
|
||||
if (N0.getOpcode() == ISD::XOR &&
|
||||
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
|
||||
//ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
|
||||
CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
|
||||
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
|
||||
|
||||
// Check RHS for vnot
|
||||
if (N1.getOpcode() == ISD::XOR &&
|
||||
ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
|
||||
//ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
|
||||
CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
|
||||
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
|
||||
|
||||
return SDValue();
|
||||
|
@ -2450,6 +2450,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
case X86::AVX_SET0PS:
|
||||
case X86::AVX_SET0PD:
|
||||
case X86::AVX_SET0PI:
|
||||
case X86::AVX_SETALLONES:
|
||||
Alignment = 16;
|
||||
break;
|
||||
case X86::FsFLD0SD:
|
||||
@ -2494,6 +2495,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
case X86::AVX_SET0PI:
|
||||
case X86::AVX_SET0PSY:
|
||||
case X86::AVX_SET0PDY:
|
||||
case X86::AVX_SETALLONES:
|
||||
case X86::FsFLD0SD:
|
||||
case X86::FsFLD0SS:
|
||||
case X86::VFsFLD0SD:
|
||||
@ -2531,9 +2533,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
|
||||
else
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
|
||||
const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
|
||||
Constant::getAllOnesValue(Ty) :
|
||||
Constant::getNullValue(Ty);
|
||||
|
||||
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
|
||||
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
|
||||
Constant::getNullValue(Ty);
|
||||
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
|
||||
|
||||
// Create operands to load from the constant pool entry.
|
||||
|
@ -3143,11 +3143,17 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
|
||||
// Alias instructions that map zero vector to pxor / xorp* for sse.
|
||||
// We set canFoldAsLoad because this can be converted to a constant-pool
|
||||
// load of an all-ones value if folding it would be beneficial.
|
||||
// FIXME: Change encoding to pseudo! This is blocked right now by the x86
|
||||
// JIT implementation, it does not expand the instructions below like
|
||||
// X86MCInstLower does.
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
|
||||
// FIXME: Change encoding to pseudo.
|
||||
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
|
||||
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
|
||||
def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
|
||||
[(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSE3 - Conversion Instructions
|
||||
|
@ -381,6 +381,7 @@ ReSimplify:
|
||||
case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
|
||||
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
|
||||
case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
|
||||
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
|
||||
|
||||
case X86::MOV16r0:
|
||||
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
|
||||
|
@ -12,3 +12,15 @@ entry:
|
||||
store <4 x double> zeroinitializer, <4 x double>* @y, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: vpcmpeqd
|
||||
; CHECK: vinsertf128 $1
|
||||
define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
|
||||
allocas:
|
||||
%ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>*
|
||||
store <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
|
||||
0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
|
||||
0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <8 x
|
||||
float>* %ptr2vec615, align 32
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user