mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-22 10:33:23 +00:00
Patch to implement UMLAL/SMLAL instructions for the ARM architecture
This patch corrects the definition of umlal/smlal instructions and adds support for matching them to the ARM dag combiner. Bug 12213 Patch by Yin Ma! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161581 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0b66bd9b07
commit
bcc4c1d2d1
@ -2747,6 +2747,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||||||
dl, MVT::i32, MVT::i32, Ops, 5);
|
dl, MVT::i32, MVT::i32, Ops, 5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case ARMISD::UMLAL:{
|
||||||
|
if (Subtarget->isThumb()) {
|
||||||
|
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||||
|
N->getOperand(3), getAL(CurDAG),
|
||||||
|
CurDAG->getRegister(0, MVT::i32)};
|
||||||
|
return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
|
||||||
|
}else{
|
||||||
|
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||||
|
N->getOperand(3), getAL(CurDAG),
|
||||||
|
CurDAG->getRegister(0, MVT::i32),
|
||||||
|
CurDAG->getRegister(0, MVT::i32) };
|
||||||
|
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
|
||||||
|
ARM::UMLAL : ARM::UMLALv5,
|
||||||
|
dl, MVT::i32, MVT::i32, Ops, 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case ARMISD::SMLAL:{
|
||||||
|
if (Subtarget->isThumb()) {
|
||||||
|
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||||
|
N->getOperand(3), getAL(CurDAG),
|
||||||
|
CurDAG->getRegister(0, MVT::i32)};
|
||||||
|
return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
|
||||||
|
}else{
|
||||||
|
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||||
|
N->getOperand(3), getAL(CurDAG),
|
||||||
|
CurDAG->getRegister(0, MVT::i32),
|
||||||
|
CurDAG->getRegister(0, MVT::i32) };
|
||||||
|
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
|
||||||
|
ARM::SMLAL : ARM::SMLALv5,
|
||||||
|
dl, MVT::i32, MVT::i32, Ops, 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
case ISD::LOAD: {
|
case ISD::LOAD: {
|
||||||
SDNode *ResNode = 0;
|
SDNode *ResNode = 0;
|
||||||
if (Subtarget->isThumb() && Subtarget->hasThumb2())
|
if (Subtarget->isThumb() && Subtarget->hasThumb2())
|
||||||
|
@ -571,6 +571,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ARM and Thumb2 support UMLAL/SMLAL.
|
||||||
|
if (!Subtarget->isThumb1Only())
|
||||||
|
setTargetDAGCombine(ISD::ADDC);
|
||||||
|
|
||||||
|
|
||||||
computeRegisterProperties();
|
computeRegisterProperties();
|
||||||
|
|
||||||
// ARM does not have f32 extending load.
|
// ARM does not have f32 extending load.
|
||||||
@ -989,6 +994,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case ARMISD::VTBL2: return "ARMISD::VTBL2";
|
case ARMISD::VTBL2: return "ARMISD::VTBL2";
|
||||||
case ARMISD::VMULLs: return "ARMISD::VMULLs";
|
case ARMISD::VMULLs: return "ARMISD::VMULLs";
|
||||||
case ARMISD::VMULLu: return "ARMISD::VMULLu";
|
case ARMISD::VMULLu: return "ARMISD::VMULLu";
|
||||||
|
case ARMISD::UMLAL: return "ARMISD::UMLAL";
|
||||||
|
case ARMISD::SMLAL: return "ARMISD::SMLAL";
|
||||||
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
|
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
|
||||||
case ARMISD::FMAX: return "ARMISD::FMAX";
|
case ARMISD::FMAX: return "ARMISD::FMAX";
|
||||||
case ARMISD::FMIN: return "ARMISD::FMIN";
|
case ARMISD::FMIN: return "ARMISD::FMIN";
|
||||||
@ -7127,6 +7134,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
|
|||||||
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue findMUL_LOHI(SDValue V) {
|
||||||
|
if (V->getOpcode() == ISD::UMUL_LOHI ||
|
||||||
|
V->getOpcode() == ISD::SMUL_LOHI)
|
||||||
|
return V;
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
|
const ARMSubtarget *Subtarget) {
|
||||||
|
|
||||||
|
if (Subtarget->isThumb1Only()) return SDValue();
|
||||||
|
|
||||||
|
// Only perform the checks after legalize when the pattern is available.
|
||||||
|
if (DCI.isBeforeLegalize()) return SDValue();
|
||||||
|
|
||||||
|
// Look for multiply add opportunities.
|
||||||
|
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
|
||||||
|
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
|
||||||
|
// a glue link from the first add to the second add.
|
||||||
|
// If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
|
||||||
|
// a S/UMLAL instruction.
|
||||||
|
// loAdd UMUL_LOHI
|
||||||
|
// \ / :lo \ :hi
|
||||||
|
// \ / \ [no multiline comment]
|
||||||
|
// ADDC | hiAdd
|
||||||
|
// \ :glue / /
|
||||||
|
// \ / /
|
||||||
|
// ADDE
|
||||||
|
//
|
||||||
|
assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
|
||||||
|
SDValue AddcOp0 = AddcNode->getOperand(0);
|
||||||
|
SDValue AddcOp1 = AddcNode->getOperand(1);
|
||||||
|
|
||||||
|
// Check if the two operands are from the same mul_lohi node.
|
||||||
|
if (AddcOp0.getNode() == AddcOp1.getNode())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
assert(AddcNode->getNumValues() == 2 &&
|
||||||
|
AddcNode->getValueType(0) == MVT::i32 &&
|
||||||
|
AddcNode->getValueType(1) == MVT::Glue &&
|
||||||
|
"Expect ADDC with two result values: i32, glue");
|
||||||
|
|
||||||
|
// Check that the ADDC adds the low result of the S/UMUL_LOHI.
|
||||||
|
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
|
||||||
|
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
|
||||||
|
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
|
||||||
|
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Look for the glued ADDE.
|
||||||
|
SDNode* AddeNode = AddcNode->getGluedUser();
|
||||||
|
if (AddeNode == NULL)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Make sure it is really an ADDE.
|
||||||
|
if (AddeNode->getOpcode() != ISD::ADDE)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
assert(AddeNode->getNumOperands() == 3 &&
|
||||||
|
AddeNode->getOperand(2).getValueType() == MVT::Glue &&
|
||||||
|
"ADDE node has the wrong inputs");
|
||||||
|
|
||||||
|
// Check for the triangle shape.
|
||||||
|
SDValue AddeOp0 = AddeNode->getOperand(0);
|
||||||
|
SDValue AddeOp1 = AddeNode->getOperand(1);
|
||||||
|
|
||||||
|
// Make sure that the ADDE operands are not coming from the same node.
|
||||||
|
if (AddeOp0.getNode() == AddeOp1.getNode())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Find the MUL_LOHI node walking up ADDE's operands.
|
||||||
|
bool IsLeftOperandMUL = false;
|
||||||
|
SDValue MULOp = findMUL_LOHI(AddeOp0);
|
||||||
|
if (MULOp == SDValue())
|
||||||
|
MULOp = findMUL_LOHI(AddeOp1);
|
||||||
|
else
|
||||||
|
IsLeftOperandMUL = true;
|
||||||
|
if (MULOp == SDValue())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Figure out the right opcode.
|
||||||
|
unsigned Opc = MULOp->getOpcode();
|
||||||
|
unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
|
||||||
|
|
||||||
|
// Figure out the high and low input values to the MLAL node.
|
||||||
|
SDValue* HiMul = &MULOp;
|
||||||
|
SDValue* HiAdd = NULL;
|
||||||
|
SDValue* LoMul = NULL;
|
||||||
|
SDValue* LowAdd = NULL;
|
||||||
|
|
||||||
|
if (IsLeftOperandMUL)
|
||||||
|
HiAdd = &AddeOp1;
|
||||||
|
else
|
||||||
|
HiAdd = &AddeOp0;
|
||||||
|
|
||||||
|
|
||||||
|
if (AddcOp0->getOpcode() == Opc) {
|
||||||
|
LoMul = &AddcOp0;
|
||||||
|
LowAdd = &AddcOp1;
|
||||||
|
}
|
||||||
|
if (AddcOp1->getOpcode() == Opc) {
|
||||||
|
LoMul = &AddcOp1;
|
||||||
|
LowAdd = &AddcOp0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LoMul == NULL)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
if (LoMul->getNode() != HiMul->getNode())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Create the merged node.
|
||||||
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
|
|
||||||
|
// Build operand list.
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
Ops.push_back(LoMul->getOperand(0));
|
||||||
|
Ops.push_back(LoMul->getOperand(1));
|
||||||
|
Ops.push_back(*LowAdd);
|
||||||
|
Ops.push_back(*HiAdd);
|
||||||
|
|
||||||
|
SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
|
||||||
|
DAG.getVTList(MVT::i32, MVT::i32),
|
||||||
|
&Ops[0], Ops.size());
|
||||||
|
|
||||||
|
// Replace the ADDs' nodes uses by the MLA node's values.
|
||||||
|
SDValue HiMLALResult(MLALNode.getNode(), 1);
|
||||||
|
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
|
||||||
|
|
||||||
|
SDValue LoMLALResult(MLALNode.getNode(), 0);
|
||||||
|
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
|
||||||
|
|
||||||
|
// Return original node to notify the driver to stop replacing.
|
||||||
|
SDValue resNode(AddcNode, 0);
|
||||||
|
return resNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// PerformADDCCombine - Target-specific dag combine transform from
|
||||||
|
/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
|
||||||
|
static SDValue PerformADDCCombine(SDNode *N,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
|
const ARMSubtarget *Subtarget) {
|
||||||
|
|
||||||
|
return AddCombineTo64bitMLAL(N, DCI, Subtarget);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
|
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
|
||||||
/// operands N0 and N1. This is a helper for PerformADDCombine that is
|
/// operands N0 and N1. This is a helper for PerformADDCombine that is
|
||||||
/// called with the default operands, and if that fails, with commuted
|
/// called with the default operands, and if that fails, with commuted
|
||||||
@ -8738,6 +8893,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
DAGCombinerInfo &DCI) const {
|
DAGCombinerInfo &DCI) const {
|
||||||
switch (N->getOpcode()) {
|
switch (N->getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
|
case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
|
||||||
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
|
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
|
||||||
case ISD::SUB: return PerformSUBCombine(N, DCI);
|
case ISD::SUB: return PerformSUBCombine(N, DCI);
|
||||||
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
|
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
|
||||||
|
@ -176,6 +176,9 @@ namespace llvm {
|
|||||||
VMULLs, // ...signed
|
VMULLs, // ...signed
|
||||||
VMULLu, // ...unsigned
|
VMULLu, // ...unsigned
|
||||||
|
|
||||||
|
UMLAL, // 64bit Unsigned Accumulate Multiply
|
||||||
|
SMLAL, // 64bit Signed Accumulate Multiply
|
||||||
|
|
||||||
// Operands of the standard BUILD_VECTOR node are not legalized, which
|
// Operands of the standard BUILD_VECTOR node are not legalized, which
|
||||||
// is fine if BUILD_VECTORs are always lowered to shuffles or other
|
// is fine if BUILD_VECTORs are always lowered to shuffles or other
|
||||||
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
|
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
|
||||||
|
@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
|
|||||||
SDTCisInt<0>,
|
SDTCisInt<0>,
|
||||||
SDTCisVT<1, i32>,
|
SDTCisVT<1, i32>,
|
||||||
SDTCisVT<4, i32>]>;
|
SDTCisVT<4, i32>]>;
|
||||||
|
|
||||||
|
def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
||||||
|
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
|
||||||
|
SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
|
||||||
|
def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
|
||||||
|
def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
|
||||||
|
|
||||||
// Node definitions.
|
// Node definitions.
|
||||||
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
|
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
|
||||||
def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
|
def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
|
||||||
@ -3396,6 +3403,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
|
|||||||
let Inst{11-8} = Rm;
|
let Inst{11-8} = Rm;
|
||||||
let Inst{3-0} = Rn;
|
let Inst{3-0} = Rn;
|
||||||
}
|
}
|
||||||
|
class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
|
||||||
|
string opc, string asm, list<dag> pattern>
|
||||||
|
: AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
|
||||||
|
bits<4> RdLo;
|
||||||
|
bits<4> RdHi;
|
||||||
|
bits<4> Rm;
|
||||||
|
bits<4> Rn;
|
||||||
|
let Inst{19-16} = RdHi;
|
||||||
|
let Inst{15-12} = RdLo;
|
||||||
|
let Inst{11-8} = Rm;
|
||||||
|
let Inst{3-0} = Rn;
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: The v5 pseudos are only necessary for the additional Constraint
|
// FIXME: The v5 pseudos are only necessary for the additional Constraint
|
||||||
// property. Remove them when it's possible to add those properties
|
// property. Remove them when it's possible to add those properties
|
||||||
@ -3478,14 +3497,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Multiply + accumulate
|
// Multiply + accumulate
|
||||||
def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
|
def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
|
||||||
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
|
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
|
||||||
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||||
Requires<[IsARM, HasV6]>;
|
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
|
||||||
def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
|
def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
|
||||||
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
|
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
|
||||||
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||||
Requires<[IsARM, HasV6]>;
|
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
|
||||||
|
|
||||||
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
|
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
|
||||||
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
|
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
|
||||||
@ -3501,17 +3520,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
|
|||||||
let Inst{3-0} = Rn;
|
let Inst{3-0} = Rn;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
|
let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in {
|
||||||
def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
||||||
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
|
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
|
||||||
4, IIC_iMAC64, [],
|
4, IIC_iMAC64, [],
|
||||||
(SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
|
(SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
|
||||||
|
pred:$p, cc_out:$s)>,
|
||||||
Requires<[IsARM, NoV6]>;
|
Requires<[IsARM, NoV6]>;
|
||||||
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
||||||
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
|
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
|
||||||
4, IIC_iMAC64, [],
|
4, IIC_iMAC64, [],
|
||||||
(UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
|
(UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
|
||||||
|
pred:$p, cc_out:$s)>,
|
||||||
Requires<[IsARM, NoV6]>;
|
Requires<[IsARM, NoV6]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
|
||||||
def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
|
||||||
(ins GPR:$Rn, GPR:$Rm, pred:$p),
|
(ins GPR:$Rn, GPR:$Rm, pred:$p),
|
||||||
4, IIC_iMAC64, [],
|
4, IIC_iMAC64, [],
|
||||||
|
@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
|
|||||||
let Inst{7-4} = opc7_4;
|
let Inst{7-4} = opc7_4;
|
||||||
let Inst{3-0} = Rm;
|
let Inst{3-0} = Rm;
|
||||||
}
|
}
|
||||||
|
class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
|
||||||
|
dag oops, dag iops, InstrItinClass itin,
|
||||||
|
string opc, string asm, list<dag> pattern>
|
||||||
|
: T2I<oops, iops, itin, opc, asm, pattern> {
|
||||||
|
bits<4> RdLo;
|
||||||
|
bits<4> RdHi;
|
||||||
|
bits<4> Rn;
|
||||||
|
bits<4> Rm;
|
||||||
|
|
||||||
|
let Inst{31-23} = 0b111110111;
|
||||||
|
let Inst{22-20} = opc22_20;
|
||||||
|
let Inst{19-16} = Rn;
|
||||||
|
let Inst{15-12} = RdLo;
|
||||||
|
let Inst{11-8} = RdHi;
|
||||||
|
let Inst{7-4} = opc7_4;
|
||||||
|
let Inst{3-0} = Rm;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
|
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
|
||||||
@ -2410,15 +2427,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000,
|
|||||||
} // isCommutable
|
} // isCommutable
|
||||||
|
|
||||||
// Multiply + accumulate
|
// Multiply + accumulate
|
||||||
def t2SMLAL : T2MulLong<0b100, 0b0000,
|
def t2SMLAL : T2MlaLong<0b100, 0b0000,
|
||||||
(outs rGPR:$RdLo, rGPR:$RdHi),
|
(outs rGPR:$RdLo, rGPR:$RdHi, rGPR:$RLo, rGPR:$RHi),
|
||||||
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
|
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
|
||||||
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
|
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||||
|
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
|
||||||
|
|
||||||
def t2UMLAL : T2MulLong<0b110, 0b0000,
|
def t2UMLAL : T2MlaLong<0b110, 0b0000,
|
||||||
(outs rGPR:$RdLo, rGPR:$RdHi),
|
(outs rGPR:$RdLo, rGPR:$RdHi, rGPR:$RLo, rGPR:$RHi),
|
||||||
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
|
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
|
||||||
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
|
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||||
|
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
|
||||||
|
|
||||||
def t2UMAAL : T2MulLong<0b110, 0b0110,
|
def t2UMAAL : T2MulLong<0b110, 0b0110,
|
||||||
(outs rGPR:$RdLo, rGPR:$RdHi),
|
(outs rGPR:$RdLo, rGPR:$RdHi),
|
||||||
|
44
test/CodeGen/ARM/longMAC.ll
Normal file
44
test/CodeGen/ARM/longMAC.ll
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
; RUN: llc < %s -march=arm | FileCheck %s
|
||||||
|
; Check generated signed and unsigned multiply accumulate long.
|
||||||
|
|
||||||
|
define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
|
||||||
|
;CHECK: MACLongTest1:
|
||||||
|
;CHECK: umlal
|
||||||
|
%conv = zext i32 %a to i64
|
||||||
|
%conv1 = zext i32 %b to i64
|
||||||
|
%mul = mul i64 %conv1, %conv
|
||||||
|
%add = add i64 %mul, %c
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {
|
||||||
|
;CHECK: MACLongTest2:
|
||||||
|
;CHECK: smlal
|
||||||
|
%conv = sext i32 %a to i64
|
||||||
|
%conv1 = sext i32 %b to i64
|
||||||
|
%mul = mul nsw i64 %conv1, %conv
|
||||||
|
%add = add nsw i64 %mul, %c
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
|
||||||
|
;CHECK: MACLongTest3:
|
||||||
|
;CHECK: umlal
|
||||||
|
%conv = zext i32 %b to i64
|
||||||
|
%conv1 = zext i32 %a to i64
|
||||||
|
%mul = mul i64 %conv, %conv1
|
||||||
|
%conv2 = zext i32 %c to i64
|
||||||
|
%add = add i64 %mul, %conv2
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
|
||||||
|
;CHECK: MACLongTest4:
|
||||||
|
;CHECK: smlal
|
||||||
|
%conv = sext i32 %b to i64
|
||||||
|
%conv1 = sext i32 %a to i64
|
||||||
|
%mul = mul nsw i64 %conv, %conv1
|
||||||
|
%conv2 = sext i32 %c to i64
|
||||||
|
%add = add nsw i64 %mul, %conv2
|
||||||
|
ret i64 %add
|
||||||
|
}
|
44
test/CodeGen/ARM/longMACt.ll
Normal file
44
test/CodeGen/ARM/longMACt.ll
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
|
||||||
|
; Check generated signed and unsigned multiply accumulate long.
|
||||||
|
|
||||||
|
define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
|
||||||
|
;CHECK: MACLongTest1:
|
||||||
|
;CHECK: umlal
|
||||||
|
%conv = zext i32 %a to i64
|
||||||
|
%conv1 = zext i32 %b to i64
|
||||||
|
%mul = mul i64 %conv1, %conv
|
||||||
|
%add = add i64 %mul, %c
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {
|
||||||
|
;CHECK: MACLongTest2:
|
||||||
|
;CHECK: smlal
|
||||||
|
%conv = sext i32 %a to i64
|
||||||
|
%conv1 = sext i32 %b to i64
|
||||||
|
%mul = mul nsw i64 %conv1, %conv
|
||||||
|
%add = add nsw i64 %mul, %c
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
|
||||||
|
;CHECK: MACLongTest3:
|
||||||
|
;CHECK: umlal
|
||||||
|
%conv = zext i32 %b to i64
|
||||||
|
%conv1 = zext i32 %a to i64
|
||||||
|
%mul = mul i64 %conv, %conv1
|
||||||
|
%conv2 = zext i32 %c to i64
|
||||||
|
%add = add i64 %mul, %conv2
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
|
||||||
|
;CHECK: MACLongTest4:
|
||||||
|
;CHECK: smlal
|
||||||
|
%conv = sext i32 %b to i64
|
||||||
|
%conv1 = sext i32 %a to i64
|
||||||
|
%mul = mul nsw i64 %conv, %conv1
|
||||||
|
%conv2 = sext i32 %c to i64
|
||||||
|
%add = add nsw i64 %mul, %conv2
|
||||||
|
ret i64 %add
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user