diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5234da71a8d..f50f9b5a33c 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -32,6 +32,7 @@ #define DEBUG_TYPE "ctrloops" #include "PPC.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -82,13 +83,14 @@ namespace { /// getCanonicalInductionVariable - Check to see if the loop has a canonical /// induction variable. /// Should be defined in MachineLoop. Based upon version in class Loop. - MachineInstr *getCanonicalInductionVariable(MachineLoop *L, - MachineInstr *&IOp) const; + void getCanonicalInductionVariable(MachineLoop *L, + SmallVector &IVars, + SmallVector &IOps) const; /// getTripCount - Return a loop-invariant LLVM register indicating the /// number of times the loop will be executed. If the trip-count cannot /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L, bool &WordCmp, + CountValue *getTripCount(MachineLoop *L, SmallVector &OldInsts) const; /// isInductionOperation - Return true if the instruction matches the @@ -175,12 +177,12 @@ namespace { /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &WordCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPLWI) { - WordCmp = true; +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { + if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; return true; - } else if (MI->getOpcode() == PPC::CMPDI || MI->getOpcode() == PPC::CMPLDI) { - WordCmp = false; + } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + SignedCmp = false; return true; } @@ -227,26 +229,27 @@ bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { /// the machine. /// This method assumes that the IndVarSimplify pass has been run by 'opt'. /// -MachineInstr -*PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, - MachineInstr *&IOp) const { +void +PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, + SmallVector &IVars, + SmallVector &IOps) const { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!"); MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return 0; // dead loop + if (PI == TopMBB->pred_end()) return; // dead loop MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + if (PI != TopMBB->pred_end()) return; // multiple backedges? // make sure there is one incoming and one backedge and determine which // is which. if (L->contains(Incoming)) { if (L->contains(Backedge)) - return 0; + return; std::swap(Incoming, Backedge); } else if (!L->contains(Backedge)) - return 0; + return; // Loop over all of the PHI nodes, looking for a canonical induction variable: // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". @@ -263,13 +266,13 @@ MachineInstr // Check if the definition is an induction operation. MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); if (isInductionOperation(DI, DefReg)) { - IOp = DI; - return MPhi; + IOps.push_back(DI); + IVars.push_back(MPhi); } } } } - return 0; + return; } /// getTripCount - Return a loop-invariant LLVM value indicating the @@ -283,66 +286,100 @@ MachineInstr /// /// Based upon getTripCount in LoopInfo. /// -CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp, +CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, SmallVector &OldInsts) const { + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate a CTR loop if the loop has more than one exit. + if (LastMBB == 0) + return 0; + + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + if (LastI->getOpcode() != PPC::BCC) + return 0; + + // We need to make sure that this compare is defining the condition + // register actually used by the terminating branch. + + unsigned PredReg = LastI->getOperand(1).getReg(); + DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); + + unsigned PredCond = LastI->getOperand(0).getImm(); + if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) + return 0; + // Check that the loop has a induction variable. - MachineInstr *IOp; - MachineInstr *IV_Inst = getCanonicalInductionVariable(L, IOp); - if (IV_Inst == 0) return 0; + SmallVector IVars, IOps; + getCanonicalInductionVariable(L, IVars, IOps); + for (unsigned i = 0; i < IVars.size(); ++i) { + MachineInstr *IOp = IOps[i]; + MachineInstr *IV_Inst = IVars[i]; - // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } + // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', + // if Imm is 0, get the count from the PHI opnd + // if Imm is -M, than M is the count + // Otherwise, Imm is the count + MachineOperand *IV_Opnd; + const MachineOperand *InitialValue; + if (!L->contains(IV_Inst->getOperand(2).getMBB())) { + InitialValue = &IV_Inst->getOperand(1); + IV_Opnd = &IV_Inst->getOperand(3); + } else { + InitialValue = &IV_Inst->getOperand(3); + IV_Opnd = &IV_Inst->getOperand(1); + } - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { - MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, WordCmp)) { - OldInsts.push_back(MI); - OldInsts.push_back(IOp); + DEBUG(dbgs() << "Considering:\n"); + DEBUG(dbgs() << " induction operation: " << *IOp); + DEBUG(dbgs() << " induction variable: " << *IV_Inst); + DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); + + // Look for the cmp instruction to determine if we + // can get a useful trip count. The trip count can + // be either a register or an immediate. The location + // of the value depends upon the type (reg or imm). + while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + bool SignedCmp; + MachineInstr *MI = IV_Opnd->getParent(); + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + MI->getOperand(0).getReg() == PredReg) { - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - int64_t ImmVal = MO.getImm(); + OldInsts.push_back(MI); + OldInsts.push_back(IOp); + + DEBUG(dbgs() << " compare: " << *MI); + + const MachineOperand &MO = MI->getOperand(2); + assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); - - if (ImmVal == 0) { - // Make sure the induction variable changes by one on each iteration. - if (iv_value != 1 && iv_value != -1) { - return 0; - } - return new CountValue(InitialValue->getReg(), iv_value > 0); - } else { + int64_t ImmVal; + if (SignedCmp) + ImmVal = (short) MO.getImm(); + else + ImmVal = MO.getImm(); + + const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); + assert(L->contains(IV_DefInstr->getParent()) && + "IV definition should occurs in loop"); + int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); + assert(InitialValue->isReg() && "Expecting register for init value"); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); - + unsigned InitialValueReg = InitialValue->getReg(); + + const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = DefInstr->getOperand(2).getImm(); + int64_t start = (short) DefInstr->getOperand(2).getImm(); const MachineInstr *DefInstr2 = MRI->getVRegDef(DefInstr->getOperand(0).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { - start |= DefInstr2->getOperand(1).getImm() << 16; + DEBUG(dbgs() << " initial constant: " << *DefInstr); + DEBUG(dbgs() << " initial constant: " << *DefInstr2); + start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; + int64_t count = ImmVal - start; if ((count % iv_value) != 0) { return 0; @@ -351,12 +388,23 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp, } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { - int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); + DEBUG(dbgs() << " initial constant: " << *DefInstr); + + int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } return new CountValue(count/iv_value); + } else if (iv_value == 1 || iv_value == -1) { + // We can't determine a constant starting value. + if (ImmVal == 0) { + return new CountValue(InitialValueReg, iv_value > 0); + } + // FIXME: handle non-zero end value. } + // FIXME: handle non-unit increments (we might not want to introduce division + // but we can handle some 2^n cases with shifts). + } } } @@ -524,10 +572,9 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { return Changed; } - bool WordCmp; SmallVector OldInsts; // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L, WordCmp, OldInsts); + CountValue *TripCount = getTripCount(L, OldInsts); if (TripCount == 0) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; @@ -575,14 +622,21 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget(); bool isPPC64 = Subtarget.isPPC64(); + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + unsigned CountReg; if (TripCount->isReg()) { // Create a copy of the loop count register. - const TargetRegisterClass *RC = + const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); + unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + (unsigned) PPC::EXTSW_32_64 : + (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, - TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); + TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); @@ -590,26 +644,12 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), CountReg).addReg(CountReg1); } - - // On a 64-bit system, if the original comparison was only 32-bit, then - // mask out the higher-order part of the count. - if (isPPC64 && WordCmp) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(PPC::RLDICL), CountReg).addReg(CountReg1 - ).addImm(0).addImm(32); - } } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Put the trip count in a register for transfer into the count register. - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; int64_t CountImm = TripCount->getImm(); - if (TripCount->isNeg()) - CountImm = -CountImm; + assert(!TripCount->isNeg() && "Constant trip count must be positive"); CountReg = MF->getRegInfo().createVirtualRegister(RC); if (CountImm > 0xFFFF) { @@ -665,6 +705,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); // Conditional branch; just delete it. + DEBUG(dbgs() << "Removing old branch: " << *LastI); LastMBB->erase(LastI); delete TripCount; diff --git a/test/CodeGen/PowerPC/ctrloop-s000.ll b/test/CodeGen/PowerPC/ctrloop-s000.ll new file mode 100644 index 00000000000..dcea06f29e7 --- /dev/null +++ b/test/CodeGen/PowerPC/ctrloop-s000.ll @@ -0,0 +1,156 @@ +; ModuleID = 'tsc_s000.c' +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +@Y = common global [16000 x double] zeroinitializer, align 32 +@X = common global [16000 x double] zeroinitializer, align 32 +@Z = common global [16000 x double] zeroinitializer, align 32 +@U = common global [16000 x double] zeroinitializer, align 32 +@V = common global [16000 x double] zeroinitializer, align 32 +@aa = common global [256 x [256 x double]] zeroinitializer, align 32 +@bb = common global [256 x [256 x double]] zeroinitializer, align 32 +@cc = common global [256 x [256 x double]] zeroinitializer, align 32 +@array = common global [65536 x double] zeroinitializer, align 32 +@x = common global [16000 x double] zeroinitializer, align 32 +@temp = common global double 0.000000e+00, align 8 +@temp_int = common global i32 0, align 4 +@a = common global [16000 x double] zeroinitializer, align 32 +@b = common global [16000 x double] zeroinitializer, align 32 +@c = common global [16000 x double] zeroinitializer, align 32 +@d = common global [16000 x double] zeroinitializer, align 32 +@e = common global [16000 x double] zeroinitializer, align 32 +@tt = common global [256 x [256 x double]] zeroinitializer, align 32 +@indx = common global [16000 x i32] zeroinitializer, align 32 +@xx = common global double* null, align 8 +@yy = common global double* null, align 8 + +define i32 @s000() nounwind { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end, %entry + %nl.010 = phi i32 [ 0, %entry ], [ %inc7, %for.end ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ] + %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv + %0 = load double* %arrayidx, align 32, !tbaa !0 + %add = fadd double %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv + store double %add, double* %arrayidx5, align 32, !tbaa !0 + %indvars.iv.next11 = or i64 %indvars.iv, 1 + %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11 + %1 = load double* %arrayidx.1, align 8, !tbaa !0 + %add.1 = fadd double %1, 1.000000e+00 + %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11 + store double %add.1, double* %arrayidx5.1, align 8, !tbaa !0 + %indvars.iv.next.112 = or i64 %indvars.iv, 2 + %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112 + %2 = load double* %arrayidx.2, align 16, !tbaa !0 + %add.2 = fadd double %2, 1.000000e+00 + %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112 + store double %add.2, double* %arrayidx5.2, align 16, !tbaa !0 + %indvars.iv.next.213 = or i64 %indvars.iv, 3 + %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213 + %3 = load double* %arrayidx.3, align 8, !tbaa !0 + %add.3 = fadd double %3, 1.000000e+00 + %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213 + store double %add.3, double* %arrayidx5.3, align 8, !tbaa !0 + %indvars.iv.next.314 = or i64 %indvars.iv, 4 + %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314 + %4 = load double* %arrayidx.4, align 32, !tbaa !0 + %add.4 = fadd double %4, 1.000000e+00 + %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314 + store double %add.4, double* %arrayidx5.4, align 32, !tbaa !0 + %indvars.iv.next.415 = or i64 %indvars.iv, 5 + %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415 + %5 = load double* %arrayidx.5, align 8, !tbaa !0 + %add.5 = fadd double %5, 1.000000e+00 + %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415 + store double %add.5, double* %arrayidx5.5, align 8, !tbaa !0 + %indvars.iv.next.516 = or i64 %indvars.iv, 6 + %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516 + %6 = load double* %arrayidx.6, align 16, !tbaa !0 + %add.6 = fadd double %6, 1.000000e+00 + %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516 + store double %add.6, double* %arrayidx5.6, align 16, !tbaa !0 + %indvars.iv.next.617 = or i64 %indvars.iv, 7 + %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617 + %7 = load double* %arrayidx.7, align 8, !tbaa !0 + %add.7 = fadd double %7, 1.000000e+00 + %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617 + store double %add.7, double* %arrayidx5.7, align 8, !tbaa !0 + %indvars.iv.next.718 = or i64 %indvars.iv, 8 + %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718 + %8 = load double* %arrayidx.8, align 32, !tbaa !0 + %add.8 = fadd double %8, 1.000000e+00 + %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718 + store double %add.8, double* %arrayidx5.8, align 32, !tbaa !0 + %indvars.iv.next.819 = or i64 %indvars.iv, 9 + %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819 + %9 = load double* %arrayidx.9, align 8, !tbaa !0 + %add.9 = fadd double %9, 1.000000e+00 + %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819 + store double %add.9, double* %arrayidx5.9, align 8, !tbaa !0 + %indvars.iv.next.920 = or i64 %indvars.iv, 10 + %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920 + %10 = load double* %arrayidx.10, align 16, !tbaa !0 + %add.10 = fadd double %10, 1.000000e+00 + %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920 + store double %add.10, double* %arrayidx5.10, align 16, !tbaa !0 + %indvars.iv.next.1021 = or i64 %indvars.iv, 11 + %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021 + %11 = load double* %arrayidx.11, align 8, !tbaa !0 + %add.11 = fadd double %11, 1.000000e+00 + %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021 + store double %add.11, double* %arrayidx5.11, align 8, !tbaa !0 + %indvars.iv.next.1122 = or i64 %indvars.iv, 12 + %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122 + %12 = load double* %arrayidx.12, align 32, !tbaa !0 + %add.12 = fadd double %12, 1.000000e+00 + %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122 + store double %add.12, double* %arrayidx5.12, align 32, !tbaa !0 + %indvars.iv.next.1223 = or i64 %indvars.iv, 13 + %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223 + %13 = load double* %arrayidx.13, align 8, !tbaa !0 + %add.13 = fadd double %13, 1.000000e+00 + %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223 + store double %add.13, double* %arrayidx5.13, align 8, !tbaa !0 + %indvars.iv.next.1324 = or i64 %indvars.iv, 14 + %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324 + %14 = load double* %arrayidx.14, align 16, !tbaa !0 + %add.14 = fadd double %14, 1.000000e+00 + %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324 + store double %add.14, double* %arrayidx5.14, align 16, !tbaa !0 + %indvars.iv.next.1425 = or i64 %indvars.iv, 15 + %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425 + %15 = load double* %arrayidx.15, align 8, !tbaa !0 + %add.15 = fadd double %15, 1.000000e+00 + %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425 + store double %add.15, double* %arrayidx5.15, align 8, !tbaa !0 + %indvars.iv.next.15 = add i64 %indvars.iv, 16 + %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32 + %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000 + br i1 %exitcond.15, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind + %inc7 = add nsw i32 %nl.010, 1 + %exitcond = icmp eq i32 %inc7, 400000 + br i1 %exitcond, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.end + ret i32 0 + +; CHECK: @s000 +; CHECK: mtctr +; CHECK: bdnz +} + +declare i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double) + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll new file mode 100644 index 00000000000..eae8c38eee0 --- /dev/null +++ b/test/CodeGen/PowerPC/ctrloop-sums.ll @@ -0,0 +1,134 @@ +; ModuleID = 'SingleSource/Regression/C/sumarray2d.c' +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +@.str = private unnamed_addr constant [23 x i8] c"Sum(Array[%d,%d] = %d\0A\00", align 1 + +define i32 @SumArray([100 x i32]* nocapture %Array, i32 %NumI, i32 %NumJ) nounwind readonly { +entry: + %cmp12 = icmp eq i32 %NumI, 0 + br i1 %cmp12, label %for.end8, label %for.cond1.preheader.lr.ph + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp29 = icmp eq i32 %NumJ, 0 + br i1 %cmp29, label %for.inc6, label %for.body3.lr.ph.us + +for.inc6.us: ; preds = %for.body3.us + %indvars.iv.next17 = add i64 %indvars.iv16, 1 + %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32 + %exitcond19 = icmp eq i32 %lftr.wideiv18, %NumI + br i1 %exitcond19, label %for.end8, label %for.body3.lr.ph.us + +for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us + %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ] + %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ] + %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv + %0 = load i32* %arrayidx5.us, align 4, !tbaa !0 + %add.us = add nsw i32 %0, %Result.111.us + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %NumJ + br i1 %exitcond, label %for.inc6.us, label %for.body3.us + +for.body3.lr.ph.us: ; preds = %for.inc6.us, %for.cond1.preheader.lr.ph + %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ] + %Result.014.us = phi i32 [ %add.us, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ] + br label %for.body3.us + +for.inc6: ; preds = %for.inc6, %for.cond1.preheader.lr.ph + %i.013 = phi i32 [ %inc7, %for.inc6 ], [ 0, %for.cond1.preheader.lr.ph ] + %inc7 = add i32 %i.013, 1 + %exitcond20 = icmp eq i32 %inc7, %NumI + br i1 %exitcond20, label %for.end8, label %for.inc6 + +for.end8: ; preds = %for.inc6.us, %for.inc6, %entry + %Result.0.lcssa = phi i32 [ 0, %entry ], [ %add.us, %for.inc6.us ], [ 0, %for.inc6 ] + ret i32 %Result.0.lcssa +; CHECK: @SumArray +; CHECK: mtctr +; CHECK: bdnz +} + +define i32 @main() nounwind { +entry: + %Array = alloca [100 x [100 x i32]], align 4 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv33 = phi i64 [ 0, %entry ], [ %indvars.iv.next34, %for.body ] + %0 = trunc i64 %indvars.iv33 to i32 + %sub = sub i32 0, %0 + %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33 + store i32 %sub, i32* %arrayidx2, align 4, !tbaa !0 + %indvars.iv.next34 = add i64 %indvars.iv33, 1 + %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32 + %exitcond36 = icmp eq i32 %lftr.wideiv35, 100 + br i1 %exitcond36, label %for.cond6.preheader, label %for.body + +for.cond6.preheader: ; preds = %for.body, %for.inc17 + %indvars.iv29 = phi i64 [ %indvars.iv.next30, %for.inc17 ], [ 0, %for.body ] + br label %for.body8 + +for.body8: ; preds = %for.inc14, %for.cond6.preheader + %indvars.iv = phi i64 [ 0, %for.cond6.preheader ], [ %indvars.iv.next, %for.inc14 ] + %1 = trunc i64 %indvars.iv to i32 + %2 = trunc i64 %indvars.iv29 to i32 + %cmp9 = icmp eq i32 %1, %2 + br i1 %cmp9, label %for.inc14, label %if.then + +if.then: ; preds = %for.body8 + %3 = add i64 %indvars.iv, %indvars.iv29 + %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv + %4 = trunc i64 %3 to i32 + store i32 %4, i32* %arrayidx13, align 4, !tbaa !0 + br label %for.inc14 + +for.inc14: ; preds = %for.body8, %if.then + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv27 = trunc i64 %indvars.iv.next to i32 + %exitcond28 = icmp eq i32 %lftr.wideiv27, 100 + br i1 %exitcond28, label %for.inc17, label %for.body8 + +for.inc17: ; preds = %for.inc14 + %indvars.iv.next30 = add i64 %indvars.iv29, 1 + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 + %exitcond32 = icmp eq i32 %lftr.wideiv31, 100 + br i1 %exitcond32, label %for.body3.lr.ph.us.i, label %for.cond6.preheader + +for.inc6.us.i: ; preds = %for.body3.us.i + %indvars.iv.next17.i = add i64 %indvars.iv16.i, 1 + %lftr.wideiv24 = trunc i64 %indvars.iv.next17.i to i32 + %exitcond25 = icmp eq i32 %lftr.wideiv24, 100 + br i1 %exitcond25, label %SumArray.exit, label %for.body3.lr.ph.us.i + +for.body3.us.i: ; preds = %for.body3.lr.ph.us.i, %for.body3.us.i + %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ] + %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ] + %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i + %5 = load i32* %arrayidx5.us.i, align 4, !tbaa !0 + %add.us.i = add nsw i32 %5, %Result.111.us.i + %indvars.iv.next.i = add i64 %indvars.iv.i, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 100 + br i1 %exitcond, label %for.inc6.us.i, label %for.body3.us.i + +for.body3.lr.ph.us.i: ; preds = %for.inc17, %for.inc6.us.i + %indvars.iv16.i = phi i64 [ %indvars.iv.next17.i, %for.inc6.us.i ], [ 0, %for.inc17 ] + %Result.014.us.i = phi i32 [ %add.us.i, %for.inc6.us.i ], [ 0, %for.inc17 ] + br label %for.body3.us.i + +SumArray.exit: ; preds = %for.inc6.us.i + %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind + ret i32 0 + +; CHECK: @main +; CHECK: mtctr +; CHECK: bdnz +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"}