mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-15 06:29:05 +00:00
AArch64/ARM64: port BSL logic from AArch64 & enable test.
I enhanced it a little in the process. The decision shouldn't really be beased on whether a BUILD_VECTOR is a splat: any set of constants will do the job provided they're related in the correct way. Also, the BUILD_VECTOR could be any operand of the incoming AND nodes, so it's best to check for all 4 possibilities rather than assuming it'll be the RHS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206569 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -635,6 +635,7 @@ const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case ARM64ISD::MVNImsl: return "ARM64ISD::MVNImsl";
|
case ARM64ISD::MVNImsl: return "ARM64ISD::MVNImsl";
|
||||||
case ARM64ISD::BICi: return "ARM64ISD::BICi";
|
case ARM64ISD::BICi: return "ARM64ISD::BICi";
|
||||||
case ARM64ISD::ORRi: return "ARM64ISD::ORRi";
|
case ARM64ISD::ORRi: return "ARM64ISD::ORRi";
|
||||||
|
case ARM64ISD::BSL: return "ARM64ISD::BSL";
|
||||||
case ARM64ISD::NEG: return "ARM64ISD::NEG";
|
case ARM64ISD::NEG: return "ARM64ISD::NEG";
|
||||||
case ARM64ISD::EXTR: return "ARM64ISD::EXTR";
|
case ARM64ISD::EXTR: return "ARM64ISD::EXTR";
|
||||||
case ARM64ISD::ZIP1: return "ARM64ISD::ZIP1";
|
case ARM64ISD::ZIP1: return "ARM64ISD::ZIP1";
|
||||||
@@ -5924,6 +5925,53 @@ static SDValue tryCombineToEXTR(SDNode *N,
|
|||||||
DAG.getConstant(ShiftRHS, MVT::i64));
|
DAG.getConstant(ShiftRHS, MVT::i64));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue tryCombineToBSL(SDNode *N,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI) {
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
|
SDLoc DL(N);
|
||||||
|
|
||||||
|
if (!VT.isVector())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue N0 = N->getOperand(0);
|
||||||
|
if (N0.getOpcode() != ISD::AND)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue N1 = N->getOperand(1);
|
||||||
|
if (N1.getOpcode() != ISD::AND)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// We only have to look for constant vectors here since the general, variable
|
||||||
|
// case can be handled in TableGen.
|
||||||
|
unsigned Bits = VT.getVectorElementType().getSizeInBits();
|
||||||
|
uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
|
||||||
|
for (int i = 1; i >= 0; --i)
|
||||||
|
for (int j = 1; j >= 0; --j) {
|
||||||
|
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
|
||||||
|
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
|
||||||
|
if (!BVN0 || !BVN1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bool FoundMatch = true;
|
||||||
|
for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
|
||||||
|
ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
|
||||||
|
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
|
||||||
|
if (!CN0 || !CN1 ||
|
||||||
|
CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
|
||||||
|
FoundMatch = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FoundMatch)
|
||||||
|
return DAG.getNode(ARM64ISD::BSL, DL, VT, SDValue(BVN0, 0),
|
||||||
|
N0->getOperand(1 - i), N1->getOperand(1 - j));
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const ARM64Subtarget *Subtarget) {
|
const ARM64Subtarget *Subtarget) {
|
||||||
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
|
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
|
||||||
@@ -5939,6 +5987,10 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
|||||||
if (Res.getNode())
|
if (Res.getNode())
|
||||||
return Res;
|
return Res;
|
||||||
|
|
||||||
|
Res = tryCombineToBSL(N, DCI);
|
||||||
|
if (Res.getNode())
|
||||||
|
return Res;
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -87,6 +87,10 @@ enum {
|
|||||||
BICi,
|
BICi,
|
||||||
ORRi,
|
ORRi,
|
||||||
|
|
||||||
|
// Vector bit select: similar to ISD::VSELECT but not all bits within an
|
||||||
|
// element must be identical.
|
||||||
|
BSL,
|
||||||
|
|
||||||
// Vector arithmetic negation
|
// Vector arithmetic negation
|
||||||
NEG,
|
NEG,
|
||||||
|
|
||||||
|
@@ -173,6 +173,7 @@ def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>;
|
|||||||
|
|
||||||
def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>;
|
def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>;
|
||||||
def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>;
|
def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>;
|
||||||
|
def ARM64bsl: SDNode<"ARM64ISD::BSL", SDT_ARM64trivec>;
|
||||||
|
|
||||||
def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>;
|
def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>;
|
||||||
def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>;
|
def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>;
|
||||||
@@ -2371,6 +2372,24 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
|
|||||||
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
|
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
|
||||||
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
|
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
|
||||||
|
|
||||||
|
def : Pat<(ARM64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
|
||||||
|
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
|
||||||
|
def : Pat<(ARM64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
|
||||||
|
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
|
||||||
|
def : Pat<(ARM64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
|
||||||
|
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
|
||||||
|
def : Pat<(ARM64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
|
||||||
|
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
|
||||||
|
|
||||||
|
def : Pat<(ARM64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
|
||||||
|
(BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
|
||||||
|
def : Pat<(ARM64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
|
||||||
|
(BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
|
||||||
|
def : Pat<(ARM64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
|
||||||
|
(BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
|
||||||
|
def : Pat<(ARM64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
|
||||||
|
(BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
|
||||||
|
|
||||||
// FIXME: the .16b and .8b variantes should be emitted by the
|
// FIXME: the .16b and .8b variantes should be emitted by the
|
||||||
// AsmWriter. TableGen's AsmWriter-generator doesn't deal with variant syntaxes
|
// AsmWriter. TableGen's AsmWriter-generator doesn't deal with variant syntaxes
|
||||||
// in aliases yet though.
|
// in aliases yet though.
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,6 @@
|
|||||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||||
|
; arm64 has no equivalent vbsl intrinsic, always using the and/or IR. The final
|
||||||
|
; two tests are duplicated by ARM64's vselect.ll test.
|
||||||
|
|
||||||
declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user