[X86] Make isel select the 2-byte register form of INC/DEC even in non-64-bit mode. Convert to the 1-byte form in non-64-bit mode as part of MCInst lowering.

Overall this seems simpler. It reduces duplication of patterns between both modes and it simplifies the memory folding/unfolding tables as they don't need to create fake instructions just to keep track of 64-bitness.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225252 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2015-01-06 07:35:50 +00:00
parent 49fe2a5a5c
commit 88dc6cd5c9
5 changed files with 78 additions and 126 deletions

View File

@ -2370,19 +2370,16 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned ResultReg = 0;
// Check if we have an immediate version.
if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
static const unsigned Opc[2][2][4] = {
{ { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } },
{ { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r },
{ X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r } }
static const unsigned Opc[2][4] = {
{ X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};
if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
ResultReg = createResultReg(TLI.getRegClassFor(VT));
bool Is64Bit = Subtarget->is64Bit();
bool IsDec = BaseOpc == X86ISD::DEC;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
} else
ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,

View File

@ -456,33 +456,29 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))],
IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst", [], IIC_UNARY_REG>,
OpSize16, Requires<[Not64BitMode]>;
def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst", [], IIC_UNARY_REG>,
OpSize32, Requires<[Not64BitMode]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
IIC_UNARY_REG>, OpSize16;
def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
IIC_UNARY_REG>, OpSize32;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))],
IIC_UNARY_REG>;
} // isConvertibleToThreeAddress = 1, CodeSize = 1
// In 64-bit mode, single byte INC and DEC cannot be encoded.
let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
// Can transform into LEA.
def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst", [], IIC_UNARY_REG>, OpSize16;
def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst", [], IIC_UNARY_REG>, OpSize32;
def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst", [], IIC_UNARY_REG>, OpSize16;
def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst", [], IIC_UNARY_REG>, OpSize32;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
let CodeSize = 1, hasSideEffects = 0 in {
def INC16r_alt : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst", [], IIC_UNARY_REG>,
OpSize16, Requires<[Not64BitMode]>;
def INC32r_alt : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst", [], IIC_UNARY_REG>,
OpSize32, Requires<[Not64BitMode]>;
} // CodeSize = 1, hasSideEffects = 0
} // Constraints = "$src1 = $dst", SchedRW
let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
@ -491,35 +487,13 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
(implicit EFLAGS)], IIC_UNARY_MEM>;
def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize16, Requires<[Not64BitMode]>;
(implicit EFLAGS)], IIC_UNARY_MEM>, OpSize16;
def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize32, Requires<[Not64BitMode]>;
(implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
// FIXME: What is this for??
def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize16, Requires<[In64BitMode]>;
def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize32, Requires<[In64BitMode]>;
def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize16, Requires<[In64BitMode]>;
def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize32, Requires<[In64BitMode]>;
} // CodeSize = 2, SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
@ -528,17 +502,29 @@ def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst", [], IIC_UNARY_REG>,
OpSize16, Requires<[Not64BitMode]>;
def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst", [], IIC_UNARY_REG>,
OpSize32, Requires<[Not64BitMode]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
IIC_UNARY_REG>, OpSize16;
def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
IIC_UNARY_REG>, OpSize32;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
} // CodeSize = 2
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
let CodeSize = 1, hasSideEffects = 0 in {
def DEC16r_alt : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst", [], IIC_UNARY_REG>,
OpSize16, Requires<[Not64BitMode]>;
def DEC32r_alt : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst", [], IIC_UNARY_REG>,
OpSize32, Requires<[Not64BitMode]>;
} // CodeSize = 1, hasSideEffects = 0
} // Constraints = "$src1 = $dst", SchedRW
@ -548,32 +534,16 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
(implicit EFLAGS)], IIC_UNARY_MEM>;
def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize16, Requires<[Not64BitMode]>;
(implicit EFLAGS)], IIC_UNARY_MEM>, OpSize16;
def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize32, Requires<[Not64BitMode]>;
(implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
let Predicates = [Not64BitMode] in {
def : Pat<(X86inc_flag GR16:$src1), (INC16r GR16:$src1)>;
def : Pat<(X86inc_flag GR32:$src1), (INC32r GR32:$src1)>;
def : Pat<(X86dec_flag GR16:$src1), (DEC16r GR16:$src1)>;
def : Pat<(X86dec_flag GR32:$src1), (DEC32r GR32:$src1)>;
}
let Predicates = [In64BitMode] in {
def : Pat<(X86inc_flag GR16:$src1), (INC64_16r GR16:$src1)>;
def : Pat<(X86inc_flag GR32:$src1), (INC64_32r GR32:$src1)>;
def : Pat<(X86dec_flag GR16:$src1), (DEC64_16r GR16:$src1)>;
def : Pat<(X86dec_flag GR32:$src1), (DEC64_32r GR32:$src1)>;
}
/// X86TypeInfo - This is a bunch of information that describes relevant X86
/// information about value types. For example, it can tell you what the
/// register class and preferred load to use.

View File

@ -1724,35 +1724,18 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
// Increment reg.
// Do not make INC if it is slow
def : Pat<(add GR8:$src, 1),
(INC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
def : Pat<(add GR16:$src, 1),
(INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
def : Pat<(add GR16:$src, 1),
(INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
def : Pat<(add GR32:$src, 1),
(INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
def : Pat<(add GR32:$src, 1),
(INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
def : Pat<(add GR64:$src, 1),
(INC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// Decrement reg.
// Do not make DEC if it is slow
def : Pat<(add GR8:$src, -1),
(DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
def : Pat<(add GR16:$src, -1),
(DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
def : Pat<(add GR16:$src, -1),
(DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
def : Pat<(add GR32:$src, -1),
(DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
def : Pat<(add GR32:$src, -1),
(DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
def : Pat<(add GR64:$src, -1),
(DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// Increment/Decrement reg.
// Do not make INC/DEC if it is slow
let Predicates = [NotSlowIncDec] in {
def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
}
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;

View File

@ -146,14 +146,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::AND8rr, X86::AND8mr, 0 },
{ X86::DEC16r, X86::DEC16m, 0 },
{ X86::DEC32r, X86::DEC32m, 0 },
{ X86::DEC64_16r, X86::DEC64_16m, 0 },
{ X86::DEC64_32r, X86::DEC64_32m, 0 },
{ X86::DEC64r, X86::DEC64m, 0 },
{ X86::DEC8r, X86::DEC8m, 0 },
{ X86::INC16r, X86::INC16m, 0 },
{ X86::INC32r, X86::INC32m, 0 },
{ X86::INC64_16r, X86::INC64_16m, 0 },
{ X86::INC64_32r, X86::INC64_32m, 0 },
{ X86::INC64r, X86::INC64m, 0 },
{ X86::INC8r, X86::INC8m, 0 },
{ X86::NEG16r, X86::NEG16m, 0 },
@ -2175,11 +2171,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
break;
}
case X86::INC16r:
case X86::INC64_16r:
addRegOffset(MIB, leaInReg, true, 1);
break;
case X86::DEC16r:
case X86::DEC64_16r:
addRegOffset(MIB, leaInReg, true, -1);
break;
case X86::ADD16ri:
@ -2332,8 +2326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
switch (MIOpc) {
default: return nullptr;
case X86::INC64r:
case X86::INC32r:
case X86::INC64_32r: {
case X86::INC32r: {
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
@ -2354,7 +2347,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::INC16r:
case X86::INC64_16r:
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
: nullptr;
@ -2363,8 +2355,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addOperand(Dest).addOperand(Src), 1);
break;
case X86::DEC64r:
case X86::DEC32r:
case X86::DEC64_32r: {
case X86::DEC32r: {
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
@ -2387,7 +2378,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::DEC16r:
case X86::DEC64_16r:
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
: nullptr;
@ -3718,14 +3708,12 @@ inline static bool isDefConvertible(MachineInstr *MI) {
case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
case X86::DEC64_32r: case X86::DEC64_16r:
case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
case X86::INC64_32r: case X86::INC64_16r:
case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
@ -5476,14 +5464,10 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
return FuseKind == FuseCmp || FuseKind == FuseInc;
case X86::INC16r:
case X86::INC32r:
case X86::INC64_16r:
case X86::INC64_32r:
case X86::INC64r:
case X86::INC8r:
case X86::DEC16r:
case X86::DEC32r:
case X86::DEC64_16r:
case X86::DEC64_32r:
case X86::DEC64r:
case X86::DEC8r:
return FuseKind == FuseInc;

View File

@ -545,6 +545,24 @@ ReSimplify:
break;
}
case X86::DEC16r:
case X86::DEC32r:
case X86::INC16r:
case X86::INC32r:
// If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
if (!AsmPrinter.getSubtarget().is64Bit()) {
unsigned Opcode;
switch (OutMI.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
case X86::INC16r: Opcode = X86::INC16r_alt; break;
case X86::INC32r: Opcode = X86::INC32r_alt; break;
}
OutMI.setOpcode(Opcode);
}
break;
// These are pseudo-ops for OR to help with the OR->ADD transformation. We do
// this with an ugly goto in case the resultant OR uses EAX and needs the
// short form.