diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index cbe4ca4f538..477505e2f79 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -747,6 +747,13 @@ public: /// there are any bits set in the constant that are not demanded. If so, /// shrink the constant and return true. bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded); + + /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the + /// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening + /// cast, but it could be generalized for targets with other types of + /// implicit widening casts. + bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, + DebugLoc dl); }; /// SimplifyDemandedBits - Look at Op. At this point, we know that only the @@ -1386,7 +1393,23 @@ public: virtual bool isTruncateFree(MVT VT1, MVT VT2) const { return false; } - + + /// isZExtFree - Return true if any actual instruction that defines a + /// value of type Ty1 implicit zero-extends the value to Ty2 in the result + /// register. This does not necessarily include registers defined in + /// unknown ways, such as incoming arguments, or copies from unknown + /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this + /// does not necessarily apply to truncate instructions. e.g. on x86-64, + /// all instructions that define 32-bit values implicit zero-extend the + /// result out to 64 bits. + virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const { + return false; + } + + virtual bool isZExtFree(MVT VT1, MVT VT2) const { + return false; + } + //===--------------------------------------------------------------------===// // Div utility functions // diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 75cd36dec2c..3f8714085ae 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -579,24 +579,41 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, iterator IP = begin(); for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) { + bool Done = false; unsigned Start = I->start, End = I->end; - IP = std::upper_bound(IP, end(), Start); - - // If the start of this range overlaps with an existing liverange, trim it. - if (IP != begin() && IP[-1].end > Start) { - Start = IP[-1].end; - // Trimmed away the whole range? - if (Start >= End) continue; + // If a clobber range starts before an existing range and ends after + // it, the clobber range will need to be split into multiple ranges. + // Loop until the entire clobber range is handled. + while (!Done) { + Done = true; + IP = std::upper_bound(IP, end(), Start); + unsigned SubRangeStart = Start; + unsigned SubRangeEnd = End; + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > SubRangeStart) { + SubRangeStart = IP[-1].end; + // Trimmed away the whole range? + if (SubRangeStart >= SubRangeEnd) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && SubRangeEnd > IP->start) { + // If the clobber live range extends beyond the existing live range, + // it'll need at least another live range, so set the flag to keep + // iterating. + if (SubRangeEnd > IP->end) { + Start = IP->end; + Done = false; + } + SubRangeEnd = IP->start; + // If this trimmed away the whole range, ignore it. + if (SubRangeStart == SubRangeEnd) continue; + } + + // Insert the clobber interval. + IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo), + IP); } - // If the end of this range overlaps with an existing liverange, trim it. - if (IP != end() && End > IP->start) { - End = IP->start; - // If this trimmed away the whole range, ignore it. - if (Start == End) continue; - } - - // Insert the clobber interval. - IP = addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 8c7fa1b1ac2..cb08fe759f5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -399,6 +399,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; // Earlyclobbers move back one. @@ -556,6 +557,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); @@ -658,6 +660,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, @@ -855,7 +858,8 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg)) Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm()); return Reg; - } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG) + } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) return VNI->copy->getOperand(2).getReg(); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 78d5d403e86..e874f1b1ec1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1666,9 +1666,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| - N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) && + N0.getOpcode() == ISD::SIGN_EXTEND || + (N0.getOpcode() == ISD::TRUNCATE && + !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), @@ -3121,10 +3123,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); } - // fold (zext (and (trunc x), cst)) -> (and x, cst). + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && - N0.getOperand(1).getOpcode() == ISD::Constant) { + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); @@ -3252,10 +3258,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); } - // fold (aext (and (trunc x), cst)) -> (and x, cst). + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && - N0.getOperand(1).getOpcode() == ISD::Constant) { + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 79a48a6fd9c..7b83a127074 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -777,6 +777,48 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, return false; } +/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the +/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening +/// cast, but it could be generalized for targets with other types of +/// implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + DebugLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + MVT SmallVT = MVT::getIntegerVT(SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + /// SimplifyDemandedBits - Look at Op. At this point, we know that only the /// DemandedMask bits of the result of Op are ever used downstream. If we can /// use this information to simplify Op, create a new simplified DAG node and @@ -865,7 +907,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the RHS is a constant, see if we can simplify it. if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; - + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. @@ -896,7 +941,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the RHS is a constant, see if we can simplify it. if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; - + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. @@ -918,7 +966,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(0)); if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); - + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + // If all of the unknown bits are known to be zero on one side or the other // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 @@ -1333,6 +1384,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } #endif break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH default: // Just use ComputeMaskedBits to compute output bits. TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index f60b23a6c50..60f7f403a09 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1070,7 +1070,8 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, return true; } } - if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { SubIdx = MI->getOperand(3).getImm(); if (VirtReg == MI->getOperand(0).getReg()) { if (!tri_->getSubReg(PhysReg, SubIdx)) @@ -1164,11 +1165,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG; + bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG; unsigned SubIdx = 0; if (isExtSubReg) { DstReg = CopyMI->getOperand(0).getReg(); SrcReg = CopyMI->getOperand(1).getReg(); - } else if (isInsSubReg) { + } else if (isInsSubReg || isSubRegToReg) { if (CopyMI->getOperand(2).getSubReg()) { DOUT << "\tSource of insert_subreg is already coalesced " << "to another register.\n"; @@ -1212,7 +1214,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { MachineBasicBlock *CopyMBB = CopyMI->getParent(); unsigned RealDstReg = 0; unsigned RealSrcReg = 0; - if (isExtSubReg || isInsSubReg) { + if (isExtSubReg || isInsSubReg || isSubRegToReg) { SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); if (SrcIsPhys && isExtSubReg) { // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be @@ -1228,7 +1230,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else SrcReg = tri_->getSubReg(SrcReg, SubIdx); SubIdx = 0; - } else if (DstIsPhys && isInsSubReg) { + } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) { // EAX = INSERT_SUBREG EAX, r1024, 0 unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg(); if (SrcSubIdx) { @@ -1241,8 +1243,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else DstReg = tri_->getSubReg(DstReg, SubIdx); SubIdx = 0; - } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && isInsSubReg)) { - if (CopyMI->getOperand(1).getSubReg()) { + } else if ((DstIsPhys && isExtSubReg) || + (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { + if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { DOUT << "\tSrc of extract_subreg already coalesced with reg" << " of a super-class.\n"; return false; // Not coalescable. @@ -1295,20 +1298,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Process moves where one of the registers have a sub-register index. MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg); - if (DstMO->getSubReg()) - // FIXME: Can we handle this? - return false; MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg); - SubIdx = SrcMO->getSubReg(); + SubIdx = DstMO->getSubReg(); if (SubIdx) { - // This is not a extract_subreg but it looks like one. - // e.g. %cl = MOV16rr %reg1024:2 - isExtSubReg = true; - if (DstIsPhys) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) + if (SrcMO->getSubReg()) + // FIXME: can we handle this? + return false; + // This is not an insert_subreg but it looks like one. + // e.g. %reg1024:3 = MOV32rr %EAX + isInsSubReg = true; + if (SrcIsPhys) { + if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) return false; // Not coalescable SubIdx = 0; } + } else { + SubIdx = SrcMO->getSubReg(); + if (SubIdx) { + // This is not a extract_subreg but it looks like one. + // e.g. %cl = MOV16rr %reg1024:2 + isExtSubReg = true; + if (DstIsPhys) { + if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) + return false; // Not coalescable + SubIdx = 0; + } + } } const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); @@ -1393,7 +1408,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SavedLI = li_->dupInterval(&DstInt); // Check if it is necessary to propagate "isDead" property. - if (!isExtSubReg && !isInsSubReg) { + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); bool isDead = mopd->isDead(); @@ -1446,12 +1461,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (!isExtSubReg && !isInsSubReg && + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. - if (!isExtSubReg && !isInsSubReg && + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { JoinedCopies.insert(CopyMI); @@ -1505,8 +1520,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the // larger super-register. - if ((isExtSubReg || isInsSubReg) && !SrcIsPhys && !DstIsPhys) { - if ((isExtSubReg && !Swapped) || (isInsSubReg && Swapped)) { + if ((isExtSubReg || isInsSubReg || isSubRegToReg) && + !SrcIsPhys && !DstIsPhys) { + if ((isExtSubReg && !Swapped) || + ((isInsSubReg || isSubRegToReg) && Swapped)) { ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator()); std::swap(SrcReg, DstReg); std::swap(ResSrcInt, ResDstInt); @@ -1594,7 +1611,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If resulting interval has a preference that no longer fits because of subreg // coalescing, just clear the preference. - if (ResDstInt->preference && (isExtSubReg || isInsSubReg) && + if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); if (!RC->contains(ResDstInt->preference)) @@ -1847,7 +1864,13 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ LHS.weight += RHS.weight; if (RHS.preference && !LHS.preference) LHS.preference = RHS.preference; - + + // Update the liveintervals of sub-registers. + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) + for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS) + li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS, + li_->getVNInfoAllocator()); + return true; } @@ -2183,7 +2206,8 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -2498,7 +2522,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Delete all coalesced copies. if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || - MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) && + MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) && "Unrecognized copy instruction"); DstReg = MI->getOperand(0).getReg(); } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e8ae988f0c1..8aa866ea29b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -177,7 +177,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, break; } - if (!KillMI || KillMI->getParent() != MBB) + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI) return false; // If any of the definitions are used by another instruction between the @@ -326,6 +326,9 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(2).getReg(); + } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); } } @@ -337,6 +340,46 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, return false; } +/// isKilled - Test if the given register value, which is used by the given +/// instruction, is killed by the given instruction. This looks through +/// coalescable copies to see if the original value is potentially not killed. +/// +/// For example, in this code: +/// +/// %reg1034 = copy %reg1024 +/// %reg1035 = copy %reg1025 +/// %reg1036 = add %reg1034, %reg1035 +/// +/// %reg1034 is not considered to be killed, since it is copied from a +/// register which is not killed. Treating it as not killed lets the +/// normal heuristics commute the (two-address) add, which lets +/// coalescing eliminate the extra copy. +/// +static bool isKilled(MachineInstr &MI, unsigned Reg, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + MachineInstr *DefMI = &MI; + for (;;) { + if (!DefMI->killsRegister(Reg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return true; + MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); + // If there are multiple defs, we can't do a simple analysis, so just + // go with what the kill flag says. + if (next(Begin) != MRI->def_end()) + return true; + DefMI = &*Begin; + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + // If the def is something other than a copy, then it isn't going to + // be coalesced, so follow the kill flag. + if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return true; + Reg = SrcReg; + } +} + /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { @@ -735,7 +778,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // rearrange the code to make it so. Making it the killing user will // allow us to coalesce A and B together, eliminating the copy we are // about to insert. - if (!mi->killsRegister(regB)) { + if (!isKilled(*mi, regB, MRI, TII)) { // If regA is dead and the instruction can be deleted, just delete // it so it doesn't clobber regB. if (mi->getOperand(ti).isDead() && isSafeToDelete(mi, TII)) { @@ -753,7 +796,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { assert(mi->getOperand(3-si).isReg() && "Not a proper commutative instruction!"); unsigned regC = mi->getOperand(3-si).getReg(); - if (mi->killsRegister(regC)) { + if (isKilled(*mi, regC, MRI, TII)) { if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { ++NumCommuted; regB = regC; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c5a6acbf7af..6bdb92fbcb7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7258,6 +7258,16 @@ bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { return Subtarget->is64Bit() || NumBits1 < 64; } +bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { + // x86-64 has implicitly zero-extends 32-bit results in 64-bit registers. + return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit(); +} + +bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { + // x86-64 has implicitly zero-extends 32-bit results in 64-bit registers. + return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); +} + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ca4af634226..45b3e974ae4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -458,7 +458,18 @@ namespace llvm { /// register EAX to i16 by referencing its sub-register AX. virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; virtual bool isTruncateFree(MVT VT1, MVT VT2) const; - + + /// isZExtFree - Return true if any actual instruction that defines a + /// value of type Ty1 implicit zero-extends the value to Ty2 in the result + /// register. This does not necessarily include registers defined in + /// unknown ways, such as incoming arguments, or copies from unknown + /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this + /// does not necessarily apply to truncate instructions. e.g. on x86-64, + /// all instructions that define 32-bit values implicit zero-extend the + /// result out to 64 bits. + virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isZExtFree(MVT VT1, MVT VT2) const; + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index ce5a8a37032..f88834e4a28 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -292,10 +292,12 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; // There's no movzlq instruction, but movl can be used for this purpose, using -// implicit zero-extension. We need this because the seeming alternative for -// implementing zext from 32 to 64, an EXTRACT_SUBREG/SUBREG_TO_REG pair, isn't -// safe because both instructions could be optimized away in the -// register-to-register case, leaving nothing behind to do the zero extension. +// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero +// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit +// zero-extension, however this isn't possible when the 32-bit value is +// defined by a truncate or is copied from something where the high bits aren't +// necessarily all zero. In such cases, we fall back to these explicit zext +// instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", [(set GR64:$dst, (zext GR32:$src))]>; @@ -303,6 +305,21 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; +// Any instruction that defines a 32-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may +// be copying from a truncate, but any other 32-bit operation will zero-extend +// up to 64 bits. +def def32 : PatLeaf<(i32 GR32:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg; +}]>; + +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + let neverHasSideEffects = 1 in { let Defs = [RAX], Uses = [EAX] in def CDQE : RI<0x98, RawFrm, (outs), (ins), @@ -1443,10 +1460,6 @@ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS), def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS), (CMOVO64rm GR64:$src2, addr:$src1)>; -// Zero-extension -def : Pat<(i64 (zext GR32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; - // zextload bool -> zextload byte def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; diff --git a/test/CodeGen/X86/2008-05-06-SpillerBug.ll b/test/CodeGen/X86/2008-05-06-SpillerBug.ll deleted file mode 100644 index e13f398062e..00000000000 --- a/test/CodeGen/X86/2008-05-06-SpillerBug.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep addb | grep ebp - - %struct.rc4_state = type { i32, i32, [256 x i32] } -@.str1 = internal constant [65 x i8] c"m[%d] = 0x%02x, m[%d] = 0x%02x, 0x%02x, k = %d, key[k] = 0x%02x\0A\00" ; <[65 x i8]*> [#uses=1] -@keys = internal constant [7 x [30 x i8]] [ [30 x i8] c"\08\01#Eg\89\AB\CD\EF\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\08\01#Eg\89\AB\CD\EF\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\04\EF\01#E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\08\01#Eg\89\AB\CD\EF\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\04\EF\01#E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] zeroinitializer ] ; <[7 x [30 x i8]]*> [#uses=1] - -declare i32 @printf(i8*, ...) nounwind - -define i32 @main(i32 %argc, i8** %argv) nounwind { -entry: - br label %bb25 - -bb25: ; preds = %bb25, %entry - %foo = phi i1 [ 0, %bb25], [ 1, %entry] - br i1 %foo, label %bb.i, label %bb25 - -bb.i: ; preds = %bb.i, %bb25 - %foo2 = phi i1 [ 0, %bb.i], [1, %bb25] - br i1 %foo2, label %bb21.i, label %bb.i - -bb21.i: ; preds = %bb21.i, %bb.i - %k.0.reg2mem.0.i = phi i32 [ %k.1.i, %bb21.i ], [ 0, %bb.i ] ; [#uses=2] - %j.0.reg2mem.0.i = phi i8 [ %tmp35.i, %bb21.i ], [ 0, %bb.i ] ; [#uses=1] - %tmp25.i = load i32* null, align 4 ; [#uses=4] - %tmp2829.i = trunc i32 %tmp25.i to i8 ; [#uses=1] - %.sum = add i32 %k.0.reg2mem.0.i, 1 ; [#uses=3] - %tmp33.i = getelementptr [7 x [30 x i8]]* @keys, i32 0, i32 0, i32 %.sum ; [#uses=1] - %tmp34.i = load i8* %tmp33.i, align 1 ; [#uses=1] - %tmp30.i = add i8 %tmp2829.i, %j.0.reg2mem.0.i ; [#uses=1] - %tmp35.i = add i8 %tmp30.i, %tmp34.i ; [#uses=2] - %tmp3536.i = zext i8 %tmp35.i to i32 ; [#uses=2] - %tmp39.i = getelementptr %struct.rc4_state* null, i32 0, i32 2, i32 %tmp3536.i ; [#uses=1] - store i32 %tmp25.i, i32* %tmp39.i, align 4 - %tmp60.i = load i32* null, align 4 ; [#uses=1] - %tmp65.i = call i32 (i8*, ...)* @printf( i8* getelementptr ([65 x i8]* @.str1, i32 0, i32 0), i32 0, i32 %tmp60.i, i32 %tmp3536.i, i32 %tmp25.i, i32 %tmp25.i, i32 %k.0.reg2mem.0.i, i32 0 ) nounwind ; [#uses=0] - %tmp70.i = icmp slt i32 %.sum, 8 ; [#uses=1] - %k.1.i = select i1 %tmp70.i, i32 %.sum, i32 0 ; [#uses=1] - br label %bb21.i -} diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll index fcbc59b838a..6b64c6ce4da 100644 --- a/test/CodeGen/X86/byval7.ll +++ b/test/CodeGen/X86/byval7.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16 +; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16 %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index b47b856052d..b21586173f9 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,7 +1,7 @@ ; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t ; RUN: grep inc %t | count 2 ; RUN: grep addq %t | count 13 -; RUN: grep leaq %t | count 10 +; RUN: grep leaq %t | count 9 ; RUN: grep movq %t | count 5 ; IV users in each of the loops from other loops shouldn't cause LSR diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll index 4e487e18472..cf9f2d81425 100644 --- a/test/CodeGen/X86/subreg-to-reg-1.ll +++ b/test/CodeGen/X86/subreg-to-reg-1.ll @@ -1,6 +1,9 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep {movl %e.\*, %e.\*} | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal .*), %e.\*} | count 1 ; Don't eliminate or coalesce away the explicit zero-extension! +; This is currently using an leal because of a 3-addressification detail, +; though this isn't necessary; The point of this test is to make sure +; a 32-bit add is used. define i64 @foo(i64 %a) { %b = add i64 %a, 4294967295 diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll new file mode 100644 index 00000000000..6634538c2af --- /dev/null +++ b/test/CodeGen/X86/subreg-to-reg-3.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep imull + +; Don't eliminate or coalesce away the explicit zero-extension! + +define i64 @foo(i64 %a) { + %b = mul i64 %a, 7823 + %c = and i64 %b, 4294967295 + %d = add i64 %c, 1 + ret i64 %d +} diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll new file mode 100644 index 00000000000..bb6af3988c9 --- /dev/null +++ b/test/CodeGen/X86/subreg-to-reg-4.ll @@ -0,0 +1,135 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep leaq %t +; RUN: not grep incq %t +; RUN: not grep decq %t +; RUN: not grep negq %t +; RUN: not grep addq %t +; RUN: not grep subq %t +; RUN: not grep {movl %} %t + +; Utilize implicit zero-extension on x86-64 to eliminate explicit +; zero-extensions. Shrink 64-bit adds to 32-bit when the high +; 32-bits will be zeroed. + +define void @bar(i64 %x, i64 %y, i64* %z) nounwind readnone { +entry: + %t0 = add i64 %x, %y + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @easy(i32 %x, i32 %y, i64* %z) nounwind readnone { +entry: + %t0 = add i32 %x, %y + %tn = zext i32 %t0 to i64 + %t1 = and i64 %tn, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +entry: + %p = load i64* %x + %t0 = add i64 %p, %y + %t1 = and i64 %t0, 4294967295 + %t2 = xor i64 %t1, %u + store i64 %t2, i64* %z + ret void +} +define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +entry: + %p = load i64* %x + %t0 = add i64 %p, %y + %t1 = xor i64 %t0, %u + %t2 = and i64 %t1, 4294967295 + store i64 %t2, i64* %z + ret void +} +define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone { +entry: + %a = load i64* %x + %b = load i64* %y + %t0 = add i64 %a, %b + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @avo(i64 %x, i64* %z, i64 %u) nounwind readnone { +entry: + %t0 = add i64 %x, 734847 + %t1 = and i64 %t0, 4294967295 + %t2 = xor i64 %t1, %u + store i64 %t2, i64* %z + ret void +} +define void @phe(i64 %x, i64* %z, i64 %u) nounwind readnone { +entry: + %t0 = add i64 %x, 734847 + %t1 = xor i64 %t0, %u + %t2 = and i64 %t1, 4294967295 + store i64 %t2, i64* %z + ret void +} +define void @oze(i64 %y, i64* %z) nounwind readnone { +entry: + %t0 = add i64 %y, 1 + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} + +define void @sbar(i64 %x, i64 %y, i64* %z) nounwind readnone { +entry: + %t0 = sub i64 %x, %y + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @seasy(i32 %x, i32 %y, i64* %z) nounwind readnone { +entry: + %t0 = sub i32 %x, %y + %tn = zext i32 %t0 to i64 + %t1 = and i64 %tn, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +entry: + %p = load i64* %x + %t0 = sub i64 %p, %y + %t1 = and i64 %t0, 4294967295 + %t2 = xor i64 %t1, %u + store i64 %t2, i64* %z + ret void +} +define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +entry: + %p = load i64* %x + %t0 = sub i64 %p, %y + %t1 = xor i64 %t0, %u + %t2 = and i64 %t1, 4294967295 + store i64 %t2, i64* %z + ret void +} +define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone { +entry: + %a = load i64* %x + %b = load i64* %y + %t0 = sub i64 %a, %b + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @swya(i64 %y, i64* %z) nounwind readnone { +entry: + %t0 = sub i64 0, %y + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} +define void @soze(i64 %y, i64* %z) nounwind readnone { +entry: + %t0 = sub i64 %y, 1 + %t1 = and i64 %t0, 4294967295 + store i64 %t1, i64* %z + ret void +} diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll new file mode 100644 index 00000000000..eee751a87d5 --- /dev/null +++ b/test/CodeGen/X86/subreg-to-reg-5.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep addl %t +; RUN: not egrep {movl|movq} %t + +define float @foo(float* %B) nounwind { +entry: + br label %bb2 + +bb2: ; preds = %bb3, %entry + %B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; [#uses=2] + br i1 false, label %bb3, label %bb4 + +bb3: ; preds = %bb2 + %indvar.next154 = add i64 %B_addr.0.rec, 1 ; [#uses=1] + br label %bb2 + +bb4: ; preds = %bb2 + %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; [#uses=1] + %t1 = ptrtoint float* %B_addr.0 to i64 ; [#uses=1] + %t2 = and i64 %t1, 15 ; [#uses=1] + %t3 = icmp eq i64 %t2, 0 ; [#uses=1] + br i1 %t3, label %bb5, label %bb10.preheader + +bb10.preheader: ; preds = %bb4 + br label %bb9 + +bb5: ; preds = %bb4 + unreachable + +bb9: ; preds = %bb10.preheader + %t5 = getelementptr float* %B, i64 0 ; [#uses=1] + %t7 = load float* %t5 ; [#uses=1] + ret float %t7 +} diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll new file mode 100644 index 00000000000..f18eef7d197 --- /dev/null +++ b/test/CodeGen/X86/subreg-to-reg-6.ll @@ -0,0 +1,29 @@ +; RUN: llvm-as < %s | llc -march=x86-64 + +define i64 @foo() nounwind { +entry: + %t0 = load i32* null, align 8 + switch i32 %t0, label %bb65 [ + i32 16, label %bb + i32 12, label %bb56 + ] + +bb: + br label %bb65 + +bb56: + unreachable + +bb65: + %a = phi i64 [ 0, %bb ], [ 0, %entry ] + tail call void asm "", "{cx}"(i64 %a) nounwind + %t15 = and i64 %a, 4294967295 + ret i64 %t15 +} + +define i64 @bar(i64 %t0) nounwind { + call void asm "", "{cx}"(i64 0) nounwind + %t1 = sub i64 0, %t0 + %t2 = and i64 %t1, 4294967295 + ret i64 %t2 +} diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll index ba98ae8252e..5293b778796 100644 --- a/test/CodeGen/X86/twoaddr-coalesce.ll +++ b/test/CodeGen/X86/twoaddr-coalesce.ll @@ -1,5 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 -stats |& \ -; RUN: grep {twoaddrinstr} | grep {Number of instructions aggressively commuted} +; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5 ; rdar://6523745 @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]