mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
[SystemZ] Define remainig *MUL_LOHI patterns
The initial port used MLG(R) for i64 UMUL_LOHI but left the other three combinations as not-legal-or-custom. Although 32x32->{32,32} multiplications exist, they're not as quick as doing a normal 64-bit multiplication, so it didn't seem like i32 SMUL_LOHI and UMUL_LOHI would be useful. There's also no direct instruction for i64 SMUL_LOHI, so it needs to be implemented in terms of UMUL_LOHI. However, not defining these patterns means that we don't convert division by a constant into multiplication, so this patch fills in the other cases. The new i64 SMUL_LOHI sequence is simpler than the one that we used previously for 64x64->128 multiplication, so int-mul-08.ll now tests the full sequence. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188898 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f44026bf26
commit
df40f8e8ad
@ -128,9 +128,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
|
||||
setOperationAction(ISD::ROTR, VT, Expand);
|
||||
|
||||
// Use *MUL_LOHI where possible and a wider multiplication otherwise.
|
||||
// Use *MUL_LOHI where possible instead of MULH*.
|
||||
setOperationAction(ISD::MULHS, VT, Expand);
|
||||
setOperationAction(ISD::MULHU, VT, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Custom);
|
||||
setOperationAction(ISD::UMUL_LOHI, VT, Custom);
|
||||
|
||||
// We have instructions for signed but not unsigned FP conversion.
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
|
||||
@ -165,14 +167,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
|
||||
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
|
||||
setOperationAction(ISD::OR, MVT::i64, Custom);
|
||||
|
||||
// The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
|
||||
// but they aren't really worth using. There is no 64-bit SMUL_LOHI,
|
||||
// but there is a 64-bit UMUL_LOHI: MLGR.
|
||||
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
|
||||
|
||||
// FIXME: Can we support these natively?
|
||||
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
|
||||
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
|
||||
@ -1142,6 +1136,20 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
|
||||
DL, MVT::Glue, CmpOp0, CmpOp1);
|
||||
}
|
||||
|
||||
// Implement a 32-bit *MUL_LOHI operation by extending both operands to
|
||||
// 64 bits. Extend is the extension type to use. Store the high part
|
||||
// in Hi and the low part in Lo.
|
||||
static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
|
||||
unsigned Extend, SDValue Op0, SDValue Op1,
|
||||
SDValue &Hi, SDValue &Lo) {
|
||||
Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
|
||||
Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
|
||||
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
|
||||
Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
|
||||
Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
|
||||
Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
|
||||
}
|
||||
|
||||
// Lower a binary operation that produces two VT results, one in each
|
||||
// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
|
||||
// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
|
||||
@ -1427,18 +1435,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getMergeValues(Ops, 2, DL);
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
SDLoc DL(Op);
|
||||
SDValue Ops[2];
|
||||
if (is32Bit(VT))
|
||||
// Just do a normal 64-bit multiplication and extract the results.
|
||||
// We define this so that it can be used for constant division.
|
||||
lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
|
||||
Op.getOperand(1), Ops[1], Ops[0]);
|
||||
else {
|
||||
// Do a full 128-bit multiplication based on UMUL_LOHI64:
|
||||
//
|
||||
// (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
|
||||
//
|
||||
// but using the fact that the upper halves are either all zeros
|
||||
// or all ones:
|
||||
//
|
||||
// (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
|
||||
//
|
||||
// and grouping the right terms together since they are quicker than the
|
||||
// multiplication:
|
||||
//
|
||||
// (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
|
||||
SDValue C63 = DAG.getConstant(63, MVT::i64);
|
||||
SDValue LL = Op.getOperand(0);
|
||||
SDValue RL = Op.getOperand(1);
|
||||
SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
|
||||
SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
|
||||
// UMUL_LOHI64 returns the low result in the odd register and the high
|
||||
// result in the even register. SMUL_LOHI is defined to return the
|
||||
// low half first, so the results are in reverse order.
|
||||
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
|
||||
LL, RL, Ops[1], Ops[0]);
|
||||
SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
|
||||
SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
|
||||
SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
|
||||
Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
|
||||
}
|
||||
return DAG.getMergeValues(Ops, 2, DL);
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
SDLoc DL(Op);
|
||||
assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
|
||||
|
||||
// UMUL_LOHI64 returns the low result in the odd register and the high
|
||||
// result in the even register. UMUL_LOHI is defined to return the
|
||||
// low half first, so the results are in reverse order.
|
||||
SDValue Ops[2];
|
||||
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
|
||||
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
|
||||
if (is32Bit(VT))
|
||||
// Just do a normal 64-bit multiplication and extract the results.
|
||||
// We define this so that it can be used for constant division.
|
||||
lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
|
||||
Op.getOperand(1), Ops[1], Ops[0]);
|
||||
else
|
||||
// UMUL_LOHI64 returns the low result in the odd register and the high
|
||||
// result in the even register. UMUL_LOHI is defined to return the
|
||||
// low half first, so the results are in reverse order.
|
||||
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
|
||||
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
|
||||
return DAG.getMergeValues(Ops, 2, DL);
|
||||
}
|
||||
|
||||
@ -1706,6 +1760,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
|
||||
return lowerVACOPY(Op, DAG);
|
||||
case ISD::DYNAMIC_STACKALLOC:
|
||||
return lowerDYNAMIC_STACKALLOC(Op, DAG);
|
||||
case ISD::SMUL_LOHI:
|
||||
return lowerSMUL_LOHI(Op, DAG);
|
||||
case ISD::UMUL_LOHI:
|
||||
return lowerUMUL_LOHI(Op, DAG);
|
||||
case ISD::SDIVREM:
|
||||
|
@ -214,6 +214,7 @@ private:
|
||||
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
56
test/CodeGen/SystemZ/int-div-06.ll
Normal file
56
test/CodeGen/SystemZ/int-div-06.ll
Normal file
@ -0,0 +1,56 @@
|
||||
; Test that divisions by constants are implemented as multiplications.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
; Check signed 32-bit division.
|
||||
define i32 @f1(i32 %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: lgfr [[REG:%r[0-5]]], %r2
|
||||
; CHECK: msgfi [[REG]], 502748801
|
||||
; CHECK-DAG: srlg [[RES1:%r[0-5]]], [[REG]], 63
|
||||
; CHECK-DAG: srag %r2, [[REG]], 46
|
||||
; CHECK: ar %r2, [[RES1]]
|
||||
; CHECK: br %r14
|
||||
%b = sdiv i32 %a, 139968
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Check unsigned 32-bit division.
|
||||
define i32 @f2(i32 %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: llgfr [[REG:%r[0-5]]], %r2
|
||||
; CHECK: msgfi [[REG]], 502748801
|
||||
; CHECK: srlg %r2, [[REG]], 46
|
||||
; CHECK: br %r14
|
||||
%b = udiv i32 %a, 139968
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Check signed 64-bit division.
|
||||
define i64 @f3(i64 %dummy, i64 %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK-DAG: llihf [[CONST:%r[0-5]]], 1005497601
|
||||
; CHECK-DAG: oilf [[CONST]], 4251762321
|
||||
; CHECK-DAG: srag [[REG:%r[0-5]]], %r3, 63
|
||||
; CHECK-DAG: ngr [[REG]], [[CONST]]
|
||||
; CHECK-DAG: mlgr %r2, [[CONST]]
|
||||
; CHECK: sgr %r2, [[REG]]
|
||||
; CHECK: srlg [[RES1:%r[0-5]]], %r2, 63
|
||||
; CHECK: srag %r2, %r2, 15
|
||||
; CHECK: agr %r2, [[RES1]]
|
||||
; CHECK: br %r14
|
||||
%b = sdiv i64 %a, 139968
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
; Check unsigned 64-bit division.
|
||||
define i64 @f4(i64 %dummy, i64 %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: llihf [[CONST:%r[0-5]]], 1005497601
|
||||
; CHECK: oilf [[CONST]], 4251762321
|
||||
; CHECK: mlgr %r2, [[CONST]]
|
||||
; CHECK: srlg %r2, %r2, 15
|
||||
; CHECK: br %r14
|
||||
%b = udiv i64 %a, 139968
|
||||
ret i64 %b
|
||||
}
|
@ -22,9 +22,13 @@ define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
|
||||
; This needs a rather convoluted sequence.
|
||||
define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: mlgr
|
||||
; CHECK: agr
|
||||
; CHECK: agr
|
||||
; CHECK-DAG: srag [[RES1:%r[0-5]]], %r3, 63
|
||||
; CHECK-DAG: srag [[RES2:%r[0-5]]], %r4, 63
|
||||
; CHECK-DAG: ngr [[RES1]], %r4
|
||||
; CHECK-DAG: ngr [[RES2]], %r3
|
||||
; CHECK-DAG: agr [[RES2]], [[RES1]]
|
||||
; CHECK-DAG: mlgr %r2, %r4
|
||||
; CHECK: sgr %r2, [[RES2]]
|
||||
; CHECK: br %r14
|
||||
%ax = sext i64 %a to i128
|
||||
%bx = sext i64 %b to i128
|
||||
|
Loading…
x
Reference in New Issue
Block a user