diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 2cfa719d73b..42844562020 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -997,7 +997,7 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) { return false; // First issue a copy to GR16_ or GR32_. - unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16to16_ : X86::MOV32to32_; + unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_RegisterClass : X86::GR32_RegisterClass; unsigned CopyReg = createResultReg(CopyRC); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6fd9d00e661..41a3c416f85 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1019,21 +1019,69 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, break; case ISD::AND: { - // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this - // allows us to fold the shift into this addressing mode. + // Perform some heroic transforms on an and of a constant-count shift + // with a constant to enable use of the scaled offset field. + SDValue Shift = N.getOperand(0); - if (Shift.getOpcode() != ISD::SHL) break; + if (Shift.getNumOperands() != 2) break; // Scale must not be used already. if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; // Not when RIP is used as the base. if (AM.isRIPRel) break; - + + SDValue X = Shift.getOperand(0); ConstantSDNode *C2 = dyn_cast(N.getOperand(1)); ConstantSDNode *C1 = dyn_cast(Shift.getOperand(1)); if (!C1 || !C2) break; + // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This + // allows us to convert the shift and and into an h-register extract and + // a scaled index. + if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { + unsigned ScaleLog = 8 - C1->getZExtValue(); + if (ScaleLog > 0 && ScaleLog < 64 && + C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { + SDValue Eight = CurDAG->getConstant(8, MVT::i8); + SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); + SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), + X, Eight); + SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), + Srl, Mask); + + // Insert the new nodes into the topological ordering. + if (Eight.getNode()->getNodeId() == -1 || + Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), Eight.getNode()); + Eight.getNode()->setNodeId(X.getNode()->getNodeId()); + } + if (Mask.getNode()->getNodeId() == -1 || + Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), Mask.getNode()); + Mask.getNode()->setNodeId(X.getNode()->getNodeId()); + } + if (Srl.getNode()->getNodeId() == -1 || + Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { + CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); + Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); + } + if (And.getNode()->getNodeId() == -1 || + And.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), And.getNode()); + And.getNode()->setNodeId(N.getNode()->getNodeId()); + } + CurDAG->ReplaceAllUsesWith(N, And); + AM.IndexReg = And; + AM.Scale = (1 << ScaleLog); + return false; + } + } + + // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this + // allows us to fold the shift into this addressing mode. + if (Shift.getOpcode() != ISD::SHL) break; + // Not likely to be profitable if either the AND or SHIFT node has more // than one use (unless all uses are for address computation). Besides, // isel mechanism requires their node ids to be reused. @@ -1046,7 +1094,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, break; // Get the new AND mask, this folds to a constant. - SDValue X = Shift.getOperand(0); SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), SDValue(C2, 0), SDValue(C1, 0)); SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 10e66e88bee..05bccabc304 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1522,7 +1522,7 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>; + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; @@ -1531,7 +1531,7 @@ def : Pat<(and GR64:$src, 0xff), (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>, + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>, Requires<[In64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), @@ -1540,13 +1540,13 @@ def : Pat<(and GR16:$src1, 0xff), // sext_inreg patterns def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>; + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), - (MOVSX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), - (MOVSX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>; def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)))>, + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>, Requires<[In64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>, @@ -1554,16 +1554,63 @@ def : Pat<(sext_inreg GR16:$src, i8), // trunc patterns def : Pat<(i32 (trunc GR64:$src)), - (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>; def : Pat<(i16 (trunc GR64:$src)), - (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; + (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>; def : Pat<(i8 (trunc GR64:$src)), - (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>; + (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>; def : Pat<(i8 (trunc GR32:$src)), - (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>, + (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), - (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit))>, + (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>, + Requires<[In64BitMode]>; + +// h-register tricks. +// For now, be conservative and only the extract if the value is immediately +// zero-extended or stored, which are somewhat common cases. This uses a bunch +// of code to prevent a register requiring a REX prefix from being allocated in +// the same instruction as the h register, as there's currently no way to +// describe this requirement to the register allocator. + +// h-register extract and zero-extend. +def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(srl_su GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_), + x86_subreg_8bit_hi)), + x86_subreg_16bit)>, + Requires<[In64BitMode]>; + +// h-register extract and store. +def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_), + x86_subreg_8bit_hi))>; +def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_), + x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 77320587cb4..77955a6a426 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -258,10 +258,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::JMP64r, X86::JMP64m, 1 }, { X86::MOV16ri, X86::MOV16mi, 0 }, { X86::MOV16rr, X86::MOV16mr, 0 }, - { X86::MOV16to16_, X86::MOV16_mr, 0 }, { X86::MOV32ri, X86::MOV32mi, 0 }, { X86::MOV32rr, X86::MOV32mr, 0 }, - { X86::MOV32to32_, X86::MOV32_mr, 0 }, { X86::MOV64ri32, X86::MOV64mi32, 0 }, { X86::MOV64rr, X86::MOV64mr, 0 }, { X86::MOV8ri, X86::MOV8mi, 0 }, @@ -372,9 +370,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, { X86::MOV16rr, X86::MOV16rm }, - { X86::MOV16to16_, X86::MOV16_rm }, { X86::MOV32rr, X86::MOV32rm }, - { X86::MOV32to32_, X86::MOV32_rm }, { X86::MOV64rr, X86::MOV64rm }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, { X86::MOV64toSDrr, X86::MOV64toSDrm }, @@ -404,6 +400,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, { X86::MOVZX16rr8, X86::MOVZX16rm8 }, { X86::MOVZX32rr16, X86::MOVZX32rm16 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 }, { X86::MOVZX32rr8, X86::MOVZX32rm8 }, { X86::MOVZX64rr16, X86::MOVZX64rm16 }, { X86::MOVZX64rr32, X86::MOVZX64rm32 }, @@ -672,8 +669,6 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: - case X86::MOV16to16_: - case X86::MOV32to32_: case X86::MOVSSrr: case X86::MOVSDrr: @@ -710,9 +705,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, default: break; case X86::MOV8rm: case X86::MOV16rm: - case X86::MOV16_rm: case X86::MOV32rm: - case X86::MOV32_rm: case X86::MOV64rm: case X86::LD_Fp64m: case X86::MOVSSrm: @@ -741,9 +734,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, default: break; case X86::MOV8mr: case X86::MOV16mr: - case X86::MOV16_mr: case X86::MOV32mr: - case X86::MOV32_mr: case X86::MOV64mr: case X86::ST_FpP64m: case X86::MOVSSmr: @@ -795,9 +786,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { default: break; case X86::MOV8rm: case X86::MOV16rm: - case X86::MOV16_rm: case X86::MOV32rm: - case X86::MOV32_rm: case X86::MOV64rm: case X86::LD_Fp64m: case X86::MOVSSrm: @@ -1670,10 +1659,22 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV16rr; } else if (DestRC == &X86::GR8RegClass) { Opc = X86::MOV8rr; + } else if (DestRC == &X86::GR64_RegClass) { + Opc = X86::MOV64rr; } else if (DestRC == &X86::GR32_RegClass) { - Opc = X86::MOV32_rr; + Opc = X86::MOV32rr; } else if (DestRC == &X86::GR16_RegClass) { - Opc = X86::MOV16_rr; + Opc = X86::MOV16rr; + } else if (DestRC == &X86::GR8_RegClass) { + Opc = X86::MOV8rr; + } else if (DestRC == &X86::GR64_NOREXRegClass) { + Opc = X86::MOV64rr; + } else if (DestRC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32rr; + } else if (DestRC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16rr; + } else if (DestRC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8rr; } else if (DestRC == &X86::RFP32RegClass) { Opc = X86::MOV_Fp3232; } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) { @@ -1721,7 +1722,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return true; } } - + // Moving from ST(0) turns into FpGET_ST0_32 etc. if (SrcRC == &X86::RSTRegClass) { // Copying from ST(0)/ST(1). @@ -1779,10 +1780,22 @@ static unsigned getStoreRegOpcode(const TargetRegisterClass *RC, Opc = X86::MOV16mr; } else if (RC == &X86::GR8RegClass) { Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_RegClass) { + Opc = X86::MOV64mr; } else if (RC == &X86::GR32_RegClass) { - Opc = X86::MOV32_mr; + Opc = X86::MOV32mr; } else if (RC == &X86::GR16_RegClass) { - Opc = X86::MOV16_mr; + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8_RegClass) { + Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_NOREXRegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32mr; + } else if (RC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8mr; } else if (RC == &X86::RFP80RegClass) { Opc = X86::ST_FpP80m; // pops } else if (RC == &X86::RFP64RegClass) { @@ -1847,10 +1860,22 @@ static unsigned getLoadRegOpcode(const TargetRegisterClass *RC, Opc = X86::MOV16rm; } else if (RC == &X86::GR8RegClass) { Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_RegClass) { + Opc = X86::MOV64rm; } else if (RC == &X86::GR32_RegClass) { - Opc = X86::MOV32_rm; + Opc = X86::MOV32rm; } else if (RC == &X86::GR16_RegClass) { - Opc = X86::MOV16_rm; + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8_RegClass) { + Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_NOREXRegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32rm; + } else if (RC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8rm; } else if (RC == &X86::RFP80RegClass) { Opc = X86::LD_Fp80m; } else if (RC == &X86::RFP64RegClass) { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index bef6b72c801..830796e3a38 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -181,6 +181,13 @@ def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; +// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of +// plain GR64, so that it doesn't potentially require a REX prefix. +def i8mem_NOREX : Operand { + let PrintMethod = "printi8mem"; + let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm); +} + def lea32mem : Operand { let PrintMethod = "printlea32mem"; let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); @@ -398,6 +405,14 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ return N->hasOneUse(); }]>; +// An 'srl' node with a single use. +def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; +// An 'trunc' node with a single use. +def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ + return N->hasOneUse(); +}]>; // 'shld' and 'shrd' instruction patterns. Note that even though these have // the srl and shl in their patterns, the C++ code must still check for them, @@ -767,7 +782,12 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store GR32:$src, addr:$dst)]>; - + +// A version of MOV8mr that uses i8mem_NOREX so that it can be used for +// storing h registers, which can't be encoded when a REX prefix is present. +def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8:$src), + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; + //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions... // @@ -2899,6 +2919,18 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; +// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8 +// except that they use GR32_NOREX for the output operand register class +// instead of GR32. This allows them to operate on h registers on x86-64. +def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, + (outs GR32_NOREX:$dst), (ins GR8:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", + []>, TB; +def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, + (outs GR32_NOREX:$dst), (ins i8mem:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", + []>, TB; + let neverHasSideEffects = 1 in { let Defs = [AX], Uses = [AL] in def CBW : I<0x98, RawFrm, (outs), (ins), @@ -2935,33 +2967,6 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), [(set GR32:$dst, 0)]>; } -// Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only -// those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX). -let neverHasSideEffects = 1, isAsCheapAsAMove = 1 in { -def MOV16to16_ : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32to32_ : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; - -def MOV16_rr : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16_:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32_rr : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32_:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -} // neverHasSideEffects - -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { -def MOV16_rm : I<0x8B, MRMSrcMem, (outs GR16_:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32_rm : I<0x8B, MRMSrcMem, (outs GR32_:$dst), (ins i32mem:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -} -let mayStore = 1, neverHasSideEffects = 1 in { -def MOV16_mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16_:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32_mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32_:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -} - //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // @@ -3341,38 +3346,61 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), - (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>; + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1), - x86_subreg_8bit)))>, + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src1, GR32_), + x86_subreg_8bit))>, Requires<[In32BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1), - x86_subreg_8bit)))>, + (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src1, GR16_), + x86_subreg_8bit))>, Requires<[In32BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), - (MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>; + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), - x86_subreg_8bit)))>, + (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_), + x86_subreg_8bit))>, Requires<[In32BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), - x86_subreg_8bit)))>, + (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_), + x86_subreg_8bit))>, Requires<[In32BitMode]>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), - (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; + (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>; def : Pat<(i8 (trunc GR32:$src)), - (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>, + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_), + x86_subreg_8bit)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc GR16:$src)), - (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>, + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_), + x86_subreg_8bit)>, + Requires<[In32BitMode]>; + +// h-register tricks +def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_), + x86_subreg_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_), + x86_subreg_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(srl_su GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32rr8 + (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_), + x86_subreg_8bit_hi)), + x86_subreg_16bit)>, + Requires<[In32BitMode]>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_), + x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 4856e2346de..33b9f5edc73 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -35,7 +35,7 @@ namespace X86 { /// these indices must be kept in sync with the class indices in the /// X86RegisterInfo.td file. enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_16BIT = 2, SUBREG_32BIT = 3 + SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4 }; } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index a7b0f88963b..b323e78cfab 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -49,7 +49,8 @@ let Namespace = "X86" in { def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>; def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>; - // High registers X86-32 only + // High registers. On x86-64, these cannot be used in any instruction + // with a REX prefix. def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>; def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>; def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>; @@ -185,41 +186,45 @@ let Namespace = "X86" in { // def x86_subreg_8bit : PatLeaf<(i32 1)>; -def x86_subreg_16bit : PatLeaf<(i32 2)>; -def x86_subreg_32bit : PatLeaf<(i32 3)>; +def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; +def x86_subreg_16bit : PatLeaf<(i32 3)>; +def x86_subreg_32bit : PatLeaf<(i32 4)>; def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; -// It's unclear if this subreg set is safe, given that not all registers -// in the class have an 'H' subreg. -// def : SubRegSet<2, [AX, CX, DX, BX], -// [AH, CH, DH, BH]>; +def : SubRegSet<2, [AX, CX, DX, BX], + [AH, CH, DH, BH]>; def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; -def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, +def : SubRegSet<2, [EAX, ECX, EDX, EBX], + [AH, CH, DH, BH]>; + +def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; -def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, +def : SubRegSet<2, [RAX, RCX, RDX, RBX], + [AH, CH, DH, BH]>; + +def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15], [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, + +def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15], [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; @@ -236,7 +241,11 @@ def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, // R8B, ... R15B. // Allocate R12 and R13 last, as these require an extra byte when // encoded in x86_64 instructions. -// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions, +// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in +// 64-bit mode. The main complication is that they cannot be encoded in an +// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc. +// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" +// cannot be encoded. def GR8 : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL, R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> { @@ -295,7 +304,7 @@ def GR8 : RegisterClass<"X86", [i8], 8, def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { - let SubRegClassList = [GR8]; + let SubRegClassList = [GR8, GR8]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -363,7 +372,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR16]; + let SubRegClassList = [GR8, GR8, GR16]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -431,7 +440,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP]> { - let SubRegClassList = [GR8, GR16, GR32]; + let SubRegClassList = [GR8, GR8, GR16, GR32]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -452,13 +461,118 @@ def GR64 : RegisterClass<"X86", [i64], 64, } -// GR16, GR32 subclasses which contain registers that have GR8 sub-registers. -// These should only be used for 32-bit mode. +// GR8_, GR16_, GR32_, GR64_ - Subclasses of GR8, GR16, GR32, and GR64 +// which contain just the "a" "b", "c", and "d" registers. On x86-32, +// GR16_ and GR32_ are classes for registers that support 8-bit subreg +// operations. On x86-64, GR16_, GR32_, and GR64_ are classes for registers +// that support 8-bit h-register operations. +def GR8_ : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { +} def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { - let SubRegClassList = [GR8]; + let SubRegClassList = [GR8_, GR8_]; } def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { - let SubRegClassList = [GR8, GR16]; + let SubRegClassList = [GR8_, GR8_, GR16_]; +} +def GR64_ : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { + let SubRegClassList = [GR8_, GR8_, GR16_, GR32_]; +} + +// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of +// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs. +// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes +// of registers which do not by themselves require a REX prefix. +def GR8_NOREX : RegisterClass<"X86", [i8], 8, + [AL, CL, DL, SIL, DIL, BL, BPL, SPL]> { +} +def GR16_NOREX : RegisterClass<"X86", [i16], 16, + [AX, CX, DX, SI, DI, BX, BP, SP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX]; +} +// GR32_NOREX - GR32 registers which do not require a REX prefix. +def GR32_NOREX : RegisterClass<"X86", [i32], 32, + [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate ESP or EBP. + static const unsigned X86_GR32_NOREX_AO_fp[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX + }; + // If not, just don't allocate ESP. + static const unsigned X86_GR32_NOREX_AO[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP + }; + + GR32_NOREXClass::iterator + GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR32_NOREX_AO_fp; + else + return X86_GR32_NOREX_AO; + } + + GR32_NOREXClass::iterator + GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR32_NOREX_AO_fp + + (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned)); + else + return X86_GR32_NOREX_AO + + (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned)); + } + }]; +} + +// GR64_NOREX - GR64 registers which do not require a REX prefix. +def GR64_NOREX : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate RSP or RBP. + static const unsigned X86_GR64_NOREX_AO_fp[] = { + X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX + }; + // If not, just don't allocate RSP. + static const unsigned X86_GR64_NOREX_AO[] = { + X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP + }; + + GR64_NOREXClass::iterator + GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR64_NOREX_AO_fp; + else + return X86_GR64_NOREX_AO; + } + + GR64_NOREXClass::iterator + GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR64_NOREX_AO_fp + + (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned)); + else + return X86_GR64_NOREX_AO + + (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned)); + } + }]; } // A class to support the 'A' assembler constraint: EAX then EDX. diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll new file mode 100644 index 00000000000..41d91285ddb --- /dev/null +++ b/test/CodeGen/X86/h-register-addressing-32.ll @@ -0,0 +1,53 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl %\[abcd\]h,} | count 7 + +; Use h-register extract and zero-extend. + +define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 8 + %t1 = and i32 %t0, 255 + %t2 = getelementptr double* %p, i32 %t1 + %t3 = load double* %t2, align 8 + ret double %t3 +} +define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 8 + %t1 = and i32 %t0, 255 + %t2 = getelementptr float* %p, i32 %t1 + %t3 = load float* %t2, align 8 + ret float %t3 +} +define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 8 + %t1 = and i32 %t0, 255 + %t2 = getelementptr i16* %p, i32 %t1 + %t3 = load i16* %t2, align 8 + ret i16 %t3 +} +define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 8 + %t1 = and i32 %t0, 255 + %t2 = getelementptr i8* %p, i32 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} +define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 5 + %t1 = and i32 %t0, 2040 + %t2 = getelementptr i8* %p, i32 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} +define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 6 + %t1 = and i32 %t0, 1020 + %t2 = getelementptr i8* %p, i32 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} +define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly { + %t0 = lshr i32 %x, 7 + %t1 = and i32 %t0, 510 + %t2 = getelementptr i8* %p, i32 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll new file mode 100644 index 00000000000..b38e0e478e9 --- /dev/null +++ b/test/CodeGen/X86/h-register-addressing-64.ll @@ -0,0 +1,53 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7 + +; Use h-register extract and zero-extend. + +define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 8 + %t1 = and i64 %t0, 255 + %t2 = getelementptr double* %p, i64 %t1 + %t3 = load double* %t2, align 8 + ret double %t3 +} +define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 8 + %t1 = and i64 %t0, 255 + %t2 = getelementptr float* %p, i64 %t1 + %t3 = load float* %t2, align 8 + ret float %t3 +} +define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 8 + %t1 = and i64 %t0, 255 + %t2 = getelementptr i16* %p, i64 %t1 + %t3 = load i16* %t2, align 8 + ret i16 %t3 +} +define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 8 + %t1 = and i64 %t0, 255 + %t2 = getelementptr i8* %p, i64 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} +define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 5 + %t1 = and i64 %t0, 2040 + %t2 = getelementptr i8* %p, i64 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} +define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 6 + %t1 = and i64 %t0, 1020 + %t2 = getelementptr i8* %p, i64 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} +define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly { + %t0 = lshr i64 %x, 7 + %t1 = and i64 %t0, 510 + %t2 = getelementptr i8* %p, i64 %t1 + %t3 = load i8* %t2, align 8 + ret i8 %t3 +} diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll new file mode 100644 index 00000000000..e8672422a7b --- /dev/null +++ b/test/CodeGen/X86/h-register-store.ll @@ -0,0 +1,27 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep mov %t | count 6 +; RUN: grep {movb %ah, (%rsi)} %t | count 3 +; RUN: llvm-as < %s | llc -march=x86 > %t +; RUN: grep mov %t | count 3 +; RUN: grep {movb %ah, (%e} %t | count 3 + +; Use h-register extract and store. + +define void @foo16(i16 inreg %p, i8* inreg %z) nounwind { + %q = lshr i16 %p, 8 + %t = trunc i16 %q to i8 + store i8 %t, i8* %z + ret void +} +define void @foo32(i32 inreg %p, i8* inreg %z) nounwind { + %q = lshr i32 %p, 8 + %t = trunc i32 %q to i8 + store i8 %t, i8* %z + ret void +} +define void @foo64(i64 inreg %p, i8* inreg %z) nounwind { + %q = lshr i64 %p, 8 + %t = trunc i64 %q to i8 + store i8 %t, i8* %z + ret void +} diff --git a/test/CodeGen/X86/h-registers.ll b/test/CodeGen/X86/h-registers.ll new file mode 100644 index 00000000000..2777be9cc3e --- /dev/null +++ b/test/CodeGen/X86/h-registers.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4 +; RUN: llvm-as < %s | llc -march=x86 > %t +; RUN: grep {incb %ah} %t | count 3 +; RUN: grep {movzbl %ah,} %t | count 3 + +; Use h registers. On x86-64, codegen doesn't support general allocation +; of h registers yet, due to x86 encoding complications. + +define void @bar64(i64 inreg %x, i8* inreg %p) nounwind { + %t0 = lshr i64 %x, 8 + %t1 = trunc i64 %t0 to i8 + %t2 = add i8 %t1, 1 + store i8 %t2, i8* %p + ret void +} + +define void @bar32(i32 inreg %x, i8* inreg %p) nounwind { + %t0 = lshr i32 %x, 8 + %t1 = trunc i32 %t0 to i8 + %t2 = add i8 %t1, 1 + store i8 %t2, i8* %p + ret void +} + +define void @bar16(i16 inreg %x, i8* inreg %p) nounwind { + %t0 = lshr i16 %x, 8 + %t1 = trunc i16 %t0 to i8 + %t2 = add i8 %t1, 1 + store i8 %t2, i8* %p + ret void +} + +define i64 @qux64(i64 inreg %x) nounwind { + %t0 = lshr i64 %x, 8 + %t1 = and i64 %t0, 255 + ret i64 %t1 +} + +define i32 @qux32(i32 inreg %x) nounwind { + %t0 = lshr i32 %x, 8 + %t1 = and i32 %t0, 255 + ret i32 %t1 +} + +define i16 @qux16(i16 inreg %x) nounwind { + %t0 = lshr i16 %x, 8 + ret i16 %t0 +} diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll index 3a84bad94d0..01f1397830a 100644 --- a/test/CodeGen/X86/inline-asm-out-regs.ll +++ b/test/CodeGen/X86/inline-asm-out-regs.ll @@ -1,6 +1,4 @@ ; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu -; XFAIL: * -; Expected to run out of registers during allocation. ; PR3391 @pci_indirect = external global { } ; <{ }*> [#uses=1]