mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
[SystemZ] Use DSGFR over DSGR in more cases
Fixes some cases where we were using full 64-bit division for (sdiv i32, i32) and (sdiv i64, i32). The "32" in "SDIVREM32" just refers to the second operand. The first operand of all *DIVREM*s is a GR128. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185435 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1ce4894a3f
commit
35b7bebe11
@ -1269,18 +1269,23 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
EVT VT = Op.getValueType();
|
||||
SDLoc DL(Op);
|
||||
unsigned Opcode;
|
||||
|
||||
// We use DSGF for 32-bit division.
|
||||
if (is32Bit(VT)) {
|
||||
Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
|
||||
Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1);
|
||||
}
|
||||
Opcode = SystemZISD::SDIVREM32;
|
||||
} else if (DAG.ComputeNumSignBits(Op1) > 32) {
|
||||
Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
|
||||
Opcode = SystemZISD::SDIVREM32;
|
||||
} else
|
||||
Opcode = SystemZISD::SDIVREM64;
|
||||
|
||||
// DSG(F) takes a 64-bit dividend, so the even register in the GR128
|
||||
// input is "don't care". The instruction returns the remainder in
|
||||
// the even register and the quotient in the odd register.
|
||||
SDValue Ops[2];
|
||||
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64,
|
||||
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
|
||||
Op0, Op1, Ops[1], Ops[0]);
|
||||
return DAG.getMergeValues(Ops, 2, DL);
|
||||
}
|
||||
|
@ -68,6 +68,7 @@ namespace SystemZISD {
|
||||
// first input operands are GR128s. The trailing numbers are the
|
||||
// widths of the second operand in bits.
|
||||
UMUL_LOHI64,
|
||||
SDIVREM32,
|
||||
SDIVREM64,
|
||||
UDIVREM32,
|
||||
UDIVREM64,
|
||||
|
@ -750,14 +750,13 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Division and remainder, from registers.
|
||||
def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>;
|
||||
def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
|
||||
def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
|
||||
def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
|
||||
def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
|
||||
defm : SXB<z_sdivrem64, GR128, DSGFR>;
|
||||
|
||||
// Division and remainder, from memory.
|
||||
def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>;
|
||||
def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load>;
|
||||
def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>;
|
||||
def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>;
|
||||
def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>;
|
||||
|
@ -81,6 +81,7 @@ def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
|
||||
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
|
||||
SDT_ZExtractAccess>;
|
||||
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
|
||||
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
|
||||
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
|
||||
def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
|
||||
def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
|
||||
|
@ -2,6 +2,8 @@
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
declare i32 @foo()
|
||||
|
||||
; Test register division. The result is in the second of the two registers.
|
||||
define void @f1(i32 *%dest, i32 %a, i32 %b) {
|
||||
; CHECK: f1:
|
||||
@ -188,3 +190,19 @@ define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) {
|
||||
%rem = srem i32 %a, %b
|
||||
ret i32 %rem
|
||||
}
|
||||
|
||||
; Make sure that we still use DSGFR rather than DSGR in cases where
|
||||
; a load and division cannot be combined.
|
||||
define void @f15(i32 *%dest, i32 *%src) {
|
||||
; CHECK: f15:
|
||||
; CHECK: l [[B:%r[0-9]+]], 0(%r3)
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK: lgfr %r1, %r2
|
||||
; CHECK: dsgfr %r0, [[B]]
|
||||
; CHECK: br %r14
|
||||
%b = load i32 *%src
|
||||
%a = call i32 @foo()
|
||||
%div = sdiv i32 %a, %b
|
||||
store i32 %div, i32 *%dest
|
||||
ret void
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
declare i64 @foo()
|
||||
|
||||
; Test register division. The result is in the second of the two registers.
|
||||
define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
|
||||
; CHECK: f1:
|
||||
@ -187,3 +189,20 @@ define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) {
|
||||
%rem = srem i64 %a, %bext
|
||||
ret i64 %rem
|
||||
}
|
||||
|
||||
; Make sure that we still use DSGFR rather than DSGR in cases where
|
||||
; a load and division cannot be combined.
|
||||
define void @f15(i64 *%dest, i32 *%src) {
|
||||
; CHECK: f15:
|
||||
; CHECK: l [[B:%r[0-9]+]], 0(%r3)
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK: lgr %r1, %r2
|
||||
; CHECK: dsgfr %r0, [[B]]
|
||||
; CHECK: br %r14
|
||||
%b = load i32 *%src
|
||||
%a = call i64 @foo()
|
||||
%ext = sext i32 %b to i64
|
||||
%div = sdiv i64 %a, %ext
|
||||
store i64 %div, i64 *%dest
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user