diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 955b88e1630..da4ad38e465 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1269,18 +1269,23 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); + unsigned Opcode; // We use DSGF for 32-bit division. if (is32Bit(VT)) { Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1); - } + Opcode = SystemZISD::SDIVREM32; + } else if (DAG.ComputeNumSignBits(Op1) > 32) { + Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + Opcode = SystemZISD::SDIVREM32; + } else + Opcode = SystemZISD::SDIVREM64; // DSG(F) takes a 64-bit dividend, so the even register in the GR128 // input is "don't care". The instruction returns the remainder in // the even register and the quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64, + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f6c49f066a9..21b4d724502 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -68,6 +68,7 @@ namespace SystemZISD { // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. UMUL_LOHI64, + SDIVREM32, SDIVREM64, UDIVREM32, UDIVREM64, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 1b53eb0a428..44be5da4e25 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -750,14 +750,13 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>; //===----------------------------------------------------------------------===// // Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; +def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; -defm : SXB; // Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>; +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load>; def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>; def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>; def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 021824e23c5..a84af7a8067 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -81,6 +81,7 @@ def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; +def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; diff --git a/test/CodeGen/SystemZ/int-div-01.ll b/test/CodeGen/SystemZ/int-div-01.ll index 492ece91497..9a0066f0d0a 100644 --- a/test/CodeGen/SystemZ/int-div-01.ll +++ b/test/CodeGen/SystemZ/int-div-01.ll @@ -2,6 +2,8 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @foo() + ; Test register division. The result is in the second of the two registers. define void @f1(i32 *%dest, i32 %a, i32 %b) { ; CHECK: f1: @@ -188,3 +190,19 @@ define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) { %rem = srem i32 %a, %b ret i32 %rem } + +; Make sure that we still use DSGFR rather than DSGR in cases where +; a load and division cannot be combined. +define void @f15(i32 *%dest, i32 *%src) { +; CHECK: f15: +; CHECK: l [[B:%r[0-9]+]], 0(%r3) +; CHECK: brasl %r14, foo@PLT +; CHECK: lgfr %r1, %r2 +; CHECK: dsgfr %r0, [[B]] +; CHECK: br %r14 + %b = load i32 *%src + %a = call i32 @foo() + %div = sdiv i32 %a, %b + store i32 %div, i32 *%dest + ret void +} diff --git a/test/CodeGen/SystemZ/int-div-03.ll b/test/CodeGen/SystemZ/int-div-03.ll index b950f2b0203..652fddc1be3 100644 --- a/test/CodeGen/SystemZ/int-div-03.ll +++ b/test/CodeGen/SystemZ/int-div-03.ll @@ -3,6 +3,8 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Test register division. The result is in the second of the two registers. define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { ; CHECK: f1: @@ -187,3 +189,20 @@ define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) { %rem = srem i64 %a, %bext ret i64 %rem } + +; Make sure that we still use DSGFR rather than DSGR in cases where +; a load and division cannot be combined. +define void @f15(i64 *%dest, i32 *%src) { +; CHECK: f15: +; CHECK: l [[B:%r[0-9]+]], 0(%r3) +; CHECK: brasl %r14, foo@PLT +; CHECK: lgr %r1, %r2 +; CHECK: dsgfr %r0, [[B]] +; CHECK: br %r14 + %b = load i32 *%src + %a = call i64 @foo() + %ext = sext i32 %b to i64 + %div = sdiv i64 %a, %ext + store i64 %div, i64 *%dest + ret void +}