Properly handle divides. As a bonus - implement memory versions of them.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76003 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Anton Korobeynikov
2009-07-16 14:14:33 +00:00
parent d20af96f5b
commit 0a42d2b437
6 changed files with 313 additions and 66 deletions
+206
View File
@@ -30,6 +30,10 @@
#include "llvm/Support/Debug.h"
using namespace llvm;
static const unsigned subreg_32bit = 1;
static const unsigned subreg_even = 1;
static const unsigned subreg_odd = 2;
namespace {
/// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
/// instead of register numbers for the leaves of the matched tree.
@@ -129,6 +133,10 @@ namespace {
SDValue &Base, SDValue &Disp, SDValue &Index);
SDNode *Select(SDValue Op);
bool TryFoldLoad(SDValue P, SDValue N,
SDValue &Base, SDValue &Disp, SDValue &Index);
bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
bool is12Bit, unsigned Depth = 0);
bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
@@ -573,6 +581,15 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr,
return false;
}
bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
SDValue &Base, SDValue &Disp, SDValue &Index) {
if (ISD::isNON_EXTLoad(N.getNode()) &&
N.hasOneUse() &&
IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
return false;
}
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void SystemZDAGToDAGISel::InstructionSelect() {
@@ -593,7 +610,9 @@ void SystemZDAGToDAGISel::InstructionSelect() {
SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
SDNode *Node = Op.getNode();
MVT NVT = Node->getValueType(0);
DebugLoc dl = Op.getDebugLoc();
unsigned Opcode = Node->getOpcode();
// Dump information about the Node being selected
#ifndef NDEBUG
@@ -611,8 +630,195 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
DOUT << "\n";
Indent -= 2;
#endif
return NULL; // Already selected.
}
switch (Opcode) {
default: break;
case ISD::SDIVREM: {
unsigned Opc, MOpc, ClrOpc = 0;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
MVT ResVT;
switch (NVT.getSimpleVT()) {
default: assert(0 && "Unsupported VT!");
case MVT::i32:
Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
ClrOpc = SystemZ::MOV32ri16;
ResVT = MVT::v2i32;
break;
case MVT::i64:
Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
ResVT = MVT::v2i64;
break;
}
SDValue Tmp0, Tmp1, Tmp2;
bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
// Prepare the dividend
SDNode *Dividend = N0.getNode();
// Insert prepared dividend into suitable 'subreg'
SDNode *Tmp = CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
dl, ResVT);
Dividend =
CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
SDValue(Tmp, 0), SDValue(Dividend, 0),
CurDAG->getTargetConstant(subreg_odd, MVT::i32));
// Zero out even subreg, if needed
if (ClrOpc) {
SDNode * ZeroHi = CurDAG->getTargetNode(SystemZ::MOV32ri16, dl, NVT,
CurDAG->getTargetConstant(0, MVT::i32));
Dividend =
CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
SDValue(Dividend, 0),
SDValue(ZeroHi, 0),
CurDAG->getTargetConstant(subreg_even, MVT::i32));
}
SDNode *Result;
SDValue DivVal = SDValue(Dividend, 0);
if (foldedLoad) {
SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
Result = CurDAG->getTargetNode(MOpc, dl, ResVT, Ops, array_lengthof(Ops));
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(Result, 0));
} else {
Result = CurDAG->getTargetNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
}
// Copy the division (odd subreg) result, if it is needed.
if (!Op.getValue(0).use_empty()) {
SDNode *Div = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
CurDAG->getTargetConstant(subreg_odd,
MVT::i32));
ReplaceUses(Op.getValue(0), SDValue(Div, 0));
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result->dump(CurDAG));
DOUT << "\n";
#endif
}
// Copy the remainder (even subreg) result, if it is needed.
if (!Op.getValue(1).use_empty()) {
SDNode *Rem = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
CurDAG->getTargetConstant(subreg_even,
MVT::i32));
ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result->dump(CurDAG));
DOUT << "\n";
#endif
}
#ifndef NDEBUG
Indent -= 2;
#endif
return NULL;
}
case ISD::UDIVREM: {
unsigned Opc, MOpc, ClrOpc;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
MVT ResVT;
switch (NVT.getSimpleVT()) {
default: assert(0 && "Unsupported VT!");
case MVT::i32:
Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
ClrOpc = SystemZ::MOV32ri16;
ResVT = MVT::v2i32;
break;
case MVT::i64:
Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
ClrOpc = SystemZ::MOV64ri16;
ResVT = MVT::v2i64;
break;
}
SDValue Tmp0, Tmp1, Tmp2;
bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
// Prepare the dividend
SDNode *Dividend = N0.getNode();
// Insert prepared dividend into suitable 'subreg'
SDNode *Tmp = CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
dl, ResVT);
Dividend =
CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
SDValue(Tmp, 0), SDValue(Dividend, 0),
CurDAG->getTargetConstant(subreg_odd, MVT::i32));
// Zero out even subreg, if needed
SDNode * ZeroHi = CurDAG->getTargetNode(ClrOpc, dl, NVT,
CurDAG->getTargetConstant(0,
MVT::i32));
Dividend =
CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
SDValue(Dividend, 0),
SDValue(ZeroHi, 0),
CurDAG->getTargetConstant(subreg_even, MVT::i32));
SDValue DivVal = SDValue(Dividend, 0);
SDNode *Result;
if (foldedLoad) {
SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
Result = CurDAG->getTargetNode(MOpc, dl,ResVT,
Ops, array_lengthof(Ops));
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(Result, 0));
} else {
Result = CurDAG->getTargetNode(Opc, dl, ResVT, DivVal, N1);
}
// Copy the division (odd subreg) result, if it is needed.
if (!Op.getValue(0).use_empty()) {
SDNode *Div = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
CurDAG->getTargetConstant(subreg_odd,
MVT::i32));
ReplaceUses(Op.getValue(0), SDValue(Div, 0));
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result->dump(CurDAG));
DOUT << "\n";
#endif
}
// Copy the remainder (even subreg) result, if it is needed.
if (!Op.getValue(1).use_empty()) {
SDNode *Rem = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
CurDAG->getTargetConstant(subreg_even,
MVT::i32));
ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result->dump(CurDAG));
DOUT << "\n";
#endif
}
#ifndef NDEBUG
Indent -= 2;
#endif
return NULL;
}
}
// Select the default instruction
SDNode *ResNode = SelectCode(Op);
@@ -43,7 +43,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
// Set up the register classes.
addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass);
addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass);
addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
addRegisterClass(MVT::i128, SystemZ::GR128RegisterClass);
addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
// Compute derived properties from the register classes
computeRegisterProperties();
@@ -70,6 +72,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::SDIV, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// FIXME: Can we lower these 2 efficiently?
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
+26 -60
View File
@@ -580,22 +580,34 @@ def MULSX64rr32 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
"msgfr\t{$dst, $src2}",
[(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
def SDIVREM64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
"dr\t{$dst, $src2}",
[]>;
def SDIVREM32r : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
"dr\t{$dst, $src2}",
[]>;
def SDIVREM64r : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
"dsgr\t{$dst, $src2}",
[]>;
def SDIVREM128rrP : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
"dsgr\t{$dst, $src2}",
[]>;
def UDIVREM64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
"dlr\t{$dst, $src2}",
[]>;
def UDIVREM128rrP : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
"dlgr\t{$dst, $src2}",
[]>;
def UDIVREM32r : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
"dlr\t{$dst, $src2}",
[]>;
def UDIVREM64r : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
"dlgr\t{$dst, $src2}",
[]>;
let mayLoad = 1 in {
def SDIVREM32m : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
"d\t{$dst, $src2}",
[]>;
def SDIVREM64m : Pseudo<(outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
"dsg\t{$dst, $src2}",
[]>;
def UDIVREM32m : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
"dl\t{$dst, $src2}",
[]>;
def UDIVREM64m : Pseudo<(outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
"dlg\t{$dst, $src2}",
[]>;
} // mayLoad
} // isTwoAddress = 1
//===----------------------------------------------------------------------===//
@@ -790,51 +802,5 @@ def : Pat<(mulhu GR64:$src1, GR64:$src2),
GR64:$src2),
subreg_even)>;
// divs
// FIXME: Add memory versions
def : Pat<(sdiv GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (SDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd),
GR32:$src2),
subreg_odd)>;
def : Pat<(sdiv GR64:$src1, GR64:$src2),
(EXTRACT_SUBREG (SDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)),
GR64:$src1, subreg_odd),
GR64:$src2),
subreg_odd)>;
def : Pat<(udiv GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (UDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd),
GR32:$src2),
subreg_odd)>;
def : Pat<(udiv GR64:$src1, GR64:$src2),
(EXTRACT_SUBREG (UDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)),
GR64:$src1, subreg_odd),
GR64:$src2),
subreg_odd)>;
// rems
// FIXME: Add memory versions
def : Pat<(srem GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (SDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd),
GR32:$src2),
subreg_even)>;
def : Pat<(srem GR64:$src1, GR64:$src2),
(EXTRACT_SUBREG (SDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)),
GR64:$src1, subreg_odd),
GR64:$src2),
subreg_even)>;
def : Pat<(urem GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (UDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd),
GR32:$src2),
subreg_even)>;
def : Pat<(urem GR64:$src1, GR64:$src2),
(EXTRACT_SUBREG (UDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)),
GR64:$src1, subreg_odd),
GR64:$src2),
subreg_even)>;
def : Pat<(i32 imm:$src),
(EXTRACT_SUBREG (MOV64ri32 (i64 imm:$src)), subreg_32bit)>;
+2 -2
View File
@@ -332,7 +332,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
}
// Even-odd register pairs
def GR64P : RegisterClass<"SystemZ", [i64], 64,
def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
[R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
{
let SubRegClassList = [GR32, GR32];
@@ -368,7 +368,7 @@ def GR64P : RegisterClass<"SystemZ", [i64], 64,
}];
}
def GR128 : RegisterClass<"SystemZ", [i128], 128,
def GR128 : RegisterClass<"SystemZ", [i128, v2i64], 128,
[R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
{
let SubRegClassList = [GR64, GR64];
+4 -4
View File
@@ -1,7 +1,7 @@
; RUN: llvm-as < %s | llc | grep dsgr | count 2
; RUN: llvm-as < %s | llc | grep dr | count 2
; RUN: llvm-as < %s | llc | grep dlr | count 2
; RUN: llvm-as < %s | llc | grep dlgr | count 2
; RUN: llvm-as < %s | llc | grep dsgr | count 2
; RUN: llvm-as < %s | llc | grep dr | count 2
; RUN: llvm-as < %s | llc | grep dlr | count 2
; RUN: llvm-as < %s | llc | grep dlgr | count 2
target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
target triple = "s390x-unknown-linux-gnu"
+64
View File
@@ -0,0 +1,64 @@
; RUN: llvm-as < %s | llc | grep {d.%} | count 2
; RUN: llvm-as < %s | llc | grep dsg | count 2
; RUN: llvm-as < %s | llc | grep {dl.%} | count 2
; RUN: llvm-as < %s | llc | grep dlg | count 2
target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
target triple = "s390x-unknown-linux-gnu"
define i64 @div(i64 %a, i64* %b) nounwind readnone {
entry:
%b1 = load i64* %b
%div = sdiv i64 %a, %b1
ret i64 %div
}
define i64 @div1(i64 %a, i64* %b) nounwind readnone {
entry:
%b1 = load i64* %b
%div = udiv i64 %a, %b1
ret i64 %div
}
define i64 @rem(i64 %a, i64* %b) nounwind readnone {
entry:
%b1 = load i64* %b
%div = srem i64 %a, %b1
ret i64 %div
}
define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
entry:
%b1 = load i64* %b
%div = urem i64 %a, %b1
ret i64 %div
}
define i32 @div2(i32 %a, i32* %b) nounwind readnone {
entry:
%b1 = load i32* %b
%div = sdiv i32 %a, %b1
ret i32 %div
}
define i32 @div3(i32 %a, i32* %b) nounwind readnone {
entry:
%b1 = load i32* %b
%div = udiv i32 %a, %b1
ret i32 %div
}
define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
entry:
%b1 = load i32* %b
%div = srem i32 %a, %b1
ret i32 %div
}
define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
entry:
%b1 = load i32* %b
%div = urem i32 %a, %b1
ret i32 %div
}