mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 22:23:10 +00:00
[AVX] Optimize x86 VSELECT instructions using SimplifyDemandedBits.
We know that the blend instructions only use the MSB, so if the mask is sign-extended then we can convert it into a SHL instruction. This is a common pattern because the type-legalizer sign-extends the i1 type which is used by the LLVM-IR for the condition. Added a new optimization in SimplifyDemandedBits for SIGN_EXTEND_INREG -> SHL. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148225 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -1608,23 +1608,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
||||
}
|
||||
break;
|
||||
case ISD::SIGN_EXTEND_INREG: {
|
||||
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
|
||||
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
|
||||
|
||||
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
|
||||
// If we only care about the highest bit, don't bother shifting right.
|
||||
if (MsbMask == DemandedMask) {
|
||||
unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
|
||||
SDValue InOp = Op.getOperand(0);
|
||||
EVT InVT = Op.getOperand(0).getValueType();
|
||||
EVT ShTy = getShiftAmountTy(InVT);
|
||||
// In this code we may handle vector types. We can't use the
|
||||
// getShiftAmountTy API because it only works on scalars.
|
||||
// We use the shift value type because we know that its an integer
|
||||
// with enough bits.
|
||||
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt,
|
||||
Op.getValueType());
|
||||
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
|
||||
Op.getValueType(), InOp, ShiftAmt));
|
||||
}
|
||||
|
||||
// Sign extension. Compute the demanded bits in the result that are not
|
||||
// present in the input.
|
||||
APInt NewBits =
|
||||
APInt::getHighBitsSet(BitWidth,
|
||||
BitWidth - EVT.getScalarType().getSizeInBits());
|
||||
BitWidth - ExVT.getScalarType().getSizeInBits());
|
||||
|
||||
// If none of the extended bits are demanded, eliminate the sextinreg.
|
||||
if ((NewBits & NewMask) == 0)
|
||||
return TLO.CombineTo(Op, Op.getOperand(0));
|
||||
|
||||
APInt InSignBit =
|
||||
APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
|
||||
APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
|
||||
APInt InputDemandedBits =
|
||||
APInt::getLowBitsSet(BitWidth,
|
||||
EVT.getScalarType().getSizeInBits()) &
|
||||
ExVT.getScalarType().getSizeInBits()) &
|
||||
NewMask;
|
||||
|
||||
// Since the sign extended bits are demanded, we know that the sign
|
||||
@@ -1642,7 +1659,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
||||
// If the input sign bit is known zero, convert this into a zero extension.
|
||||
if (KnownZero.intersects(InSignBit))
|
||||
return TLO.CombineTo(Op,
|
||||
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
|
||||
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
|
||||
|
||||
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
|
||||
KnownOne |= NewBits;
|
||||
|
||||
Reference in New Issue
Block a user