diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index d64df445b05..1d42c323b9b 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -405,9 +405,12 @@ public: /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new /// instruction after load / store are unfolded from an instruction of the /// specified opcode. It returns zero if the specified unfolding is not - /// possible. + /// possible. If LoadRegIndex is non-null, it is filled in with the operand + /// index of the operand which will hold the register holding the loaded + /// value. virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore) const { + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = 0) const { return 0; } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 5ab0ff745d1..7b64047c39c 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -392,14 +392,16 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (!AA->pointsToConstantMemory(MMO->getValue())) return 0; } // Next determine the register class for a temporary register. + unsigned LoadRegIndex; unsigned NewOpc = TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), /*UnfoldLoad=*/true, - /*UnfoldStore=*/false); + /*UnfoldStore=*/false, + &LoadRegIndex); if (NewOpc == 0) return 0; const TargetInstrDesc &TID = TII->get(NewOpc); if (TID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(TRI); + const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = RegInfo->createVirtualRegister(RC); SmallVector NewMIs; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e8a39d11040..87bc10d9fe0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2621,7 +2621,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, } unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore) const { + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex) const { DenseMap >::iterator I = MemOp2RegOpTable.find((unsigned*)Opc); if (I == MemOp2RegOpTable.end()) @@ -2632,6 +2633,8 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return 0; if (UnfoldStore && !FoldedStore) return 0; + if (LoadRegIndex) + *LoadRegIndex = I->second.second & 0xf; return I->second.first; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 2237c8be517..6eb07d55eb3 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -560,9 +560,12 @@ public: /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new /// instruction after load / store are unfolded from an instruction of the /// specified opcode. It returns zero if the specified unfolding is not - /// possible. + /// possible. If LoadRegIndex is non-null, it is filled in with the operand + /// index of the operand which will hold the register holding the loaded + /// value. virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore) const; + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = 0) const; virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 1da30ad26f5..64d449f678f 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -120,3 +120,29 @@ return: ; preds = %bb60 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone + +; CodeGen should use the correct register class when extracting +; a load from a zero-extending load for hoisting. + +; CHECK: default_get_pch_validity: +; CHECK: movl cl_options_count(%rip), %ecx + +@cl_options_count = external constant i32 ; [#uses=2] + +define void @default_get_pch_validity() nounwind { +entry: + %tmp4 = load i32* @cl_options_count, align 4 ; [#uses=1] + %tmp5 = icmp eq i32 %tmp4, 0 ; [#uses=1] + br i1 %tmp5, label %bb6, label %bb2 + +bb2: ; preds = %bb2, %entry + %i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; [#uses=1] + %tmp25 = add i64 %i.019, 1 ; [#uses=2] + %tmp11 = load i32* @cl_options_count, align 4 ; [#uses=1] + %tmp12 = zext i32 %tmp11 to i64 ; [#uses=1] + %tmp13 = icmp ugt i64 %tmp12, %tmp25 ; [#uses=1] + br i1 %tmp13, label %bb2, label %bb6 + +bb6: ; preds = %bb2, %entry + ret void +}