diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index ba5e05dd101..73efc507fab 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -14,7 +14,6 @@ #ifndef LLVM_TARGET_TARGETINSTRINFO_H #define LLVM_TARGET_TARGETINSTRINFO_H -#include "llvm/ADT/SmallSet.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineFunction.h" @@ -694,16 +693,6 @@ public: return false; } - /// optimizeLoadInstr - Try to remove the load by folding it to a register - /// operand at the use. We fold the load instructions if and only if the - /// def and use are in the same BB. - virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, - const MachineRegisterInfo *MRI, - SmallSet &FoldAsLoadDefRegs, - MachineInstr *&DefMI) const { - return 0; - } - /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 0a4632a9049..91c33c4af41 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,7 +78,6 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); -STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -442,7 +441,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; - SmallSet FoldAsLoadDefRegs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -450,7 +448,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); - FoldAsLoadDefRegs.clear(); bool First = true; MachineBasicBlock::iterator PMII; @@ -492,25 +489,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } - MachineInstr *DefMI = 0; - MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefRegs, - DefMI); - if (FoldMI) { - // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. - LocalMIs.erase(MI); - LocalMIs.erase(DefMI); - LocalMIs.insert(FoldMI); - MI->eraseFromParent(); - DefMI->eraseFromParent(); - ++NumLoadFold; - - // MI is replaced with FoldMI. - Changed = true; - PMII = FoldMI; - MII = llvm::next(PMII); - continue; - } - First = false; PMII = MII; ++MII; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2bae2c6e452..89a57b53f3c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3323,75 +3323,6 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return true; } -/// optimizeLoadInstr - Try to remove the load by folding it to a register -/// operand at the use. We fold the load instructions if and only if the -/// def and use are in the same BB. -MachineInstr* X86InstrInfo:: -optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, - SmallSet &FoldAsLoadDefRegs, - MachineInstr *&DefMI) const { - if (MI->mayStore() || MI->isCall()) - // To be conservative, we don't fold the loads if there is a store in - // between. - FoldAsLoadDefRegs.clear(); - // We only fold loads to a virtual register. - if (MI->canFoldAsLoad()) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.getNumDefs() == 1) { - unsigned Reg = MI->getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneUse when inserting - // loads. It should be checked when processing uses of the load, since - // uses can be removed during peephole. - if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->hasOneUse(Reg)) { - FoldAsLoadDefRegs.insert(Reg); - return 0; - } - } - } - - // Collect information about virtual register operands of MI. - DenseMap SrcVirtualRegToOp; - SmallSet DstVirtualRegs; - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (MO.isDef()) - DstVirtualRegs.insert(Reg); - else if (FoldAsLoadDefRegs.count(Reg)) { - // Only handle the case where Reg is used in a single src operand. - if (SrcVirtualRegToOp.find(Reg) != SrcVirtualRegToOp.end()) - SrcVirtualRegToOp.erase(Reg); - else - SrcVirtualRegToOp.insert(std::make_pair(Reg, i)); - } - } - - for (DenseMap::iterator SI = SrcVirtualRegToOp.begin(), - SE = SrcVirtualRegToOp.end(); SI != SE; SI++) { - // If the virtual register is updated by MI, we can't fold the load. - if (DstVirtualRegs.count(SI->first)) continue; - - // Check whether we can fold the def into this operand. - DefMI = MRI->getVRegDef(SI->first); - assert(DefMI); - bool SawStore = false; - if (!DefMI->isSafeToMove(this, 0, SawStore)) - continue; - - SmallVector Ops; - Ops.push_back(SI->second); - MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); - if (!FoldMI) continue; - FoldAsLoadDefRegs.erase(SI->first); - return FoldMI; - } - return 0; -} - /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr /// instruction with two undef reads of the register being defined. This is /// used for mapping: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 9205315653e..ec9b2e619d9 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -387,14 +387,6 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; - /// optimizeLoadInstr - Try to remove the load by folding it to a register - /// operand at the use. We fold the load instructions if and only if the - /// def and use are in the same BB. - virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, - const MachineRegisterInfo *MRI, - SmallSet &FoldAsLoadDefRegs, - MachineInstr *&DefMI) const; - private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll index 1c1e8e2f0a2..61fef90139d 100644 --- a/test/CodeGen/X86/2012-05-19-avx2-store.ll +++ b/test/CodeGen/X86/2012-05-19-avx2-store.ll @@ -3,7 +3,8 @@ define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp { entry: ; CHECK: vmovaps - ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]), + ; CHECK: vmovaps + ; CHECK: vinsertf128 ; CHECK: vmovups %A = load <4 x i32>* %Ap %B = load <4 x i32>* %Bp diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll index 4d801891da5..3e658671436 100644 --- a/test/CodeGen/X86/break-sse-dep.ll +++ b/test/CodeGen/X86/break-sse-dep.ll @@ -34,7 +34,8 @@ entry: define double @squirt(double* %x) nounwind { entry: ; CHECK: squirt: -; CHECK: sqrtsd ([[A0]]), %xmm0 +; CHECK: movsd ([[A0]]), %xmm0 +; CHECK: sqrtsd %xmm0, %xmm0 %z = load double* %x %t = call double @llvm.sqrt.f64(double %z) ret double %t diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index c961f7576f9..e03cb7edb58 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -45,29 +45,3 @@ L: } -; rdar://10554090 -; xor in exit block will be CSE'ed and load will be folded to xor in entry. -define i1 @test3(i32* %P, i32* %Q) nounwind { -; CHECK: test3: -; CHECK: movl 8(%esp), %eax -; CHECK: xorl (%eax), -; CHECK: j -; CHECK-NOT: xor -entry: - %0 = load i32* %P, align 4 - %1 = load i32* %Q, align 4 - %2 = xor i32 %0, %1 - %3 = and i32 %2, 65535 - %4 = icmp eq i32 %3, 0 - br i1 %4, label %exit, label %land.end - -exit: - %shr.i.i19 = xor i32 %1, %0 - %5 = and i32 %shr.i.i19, 2147418112 - %6 = icmp eq i32 %5, 0 - br label %land.end - -land.end: - %7 = phi i1 [ %6, %exit ], [ false, %entry ] - ret i1 %7 -} diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll index a35dccddbab..cc4198d7caf 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-1.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll @@ -1,14 +1,11 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+sse2 > %t +; RUN: grep pcmpeqd %t | count 1 +; RUN: grep xor %t | count 1 +; RUN: not grep LCP %t define <2 x double> @foo() nounwind { ret <2 x double> bitcast (<2 x i64> to <2 x double>) -; CHECK: foo: -; CHECK: pcmpeqd %xmm{{[0-9]+}}, %xmm{{[0-9]+}} -; CHECK-NEXT: ret } define <2 x double> @bar() nounwind { ret <2 x double> bitcast (<2 x i64> to <2 x double>) -; CHECK: bar: -; CHECK: xorps %xmm{{[0-9]+}}, %xmm{{[0-9]+}} -; CHECK-NEXT: ret } diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index f299bd101cf..4405f684512 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=nehalem -asm-verbose=false | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s +; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of ; then can be matched provided that the operands are swapped. @@ -137,13 +137,16 @@ define double @ole_inverse(double %x, double %y) nounwind { } ; CHECK: ogt_x: -; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ogt_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ogt_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ogt_x(double %x) nounwind { %c = fcmp ogt double %x, 0.000000e+00 @@ -152,13 +155,16 @@ define double @ogt_x(double %x) nounwind { } ; CHECK: olt_x: -; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: olt_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: olt_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @olt_x(double %x) nounwind { %c = fcmp olt double %x, 0.000000e+00 @@ -211,10 +217,12 @@ define double @olt_inverse_x(double %x) nounwind { ; CHECK: oge_x: ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: oge_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: oge_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @oge_x(double %x) nounwind { %c = fcmp oge double %x, 0.000000e+00 @@ -225,10 +233,12 @@ define double @oge_x(double %x) nounwind { ; CHECK: ole_x: ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ole_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ole_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ole_x(double %x) nounwind { %c = fcmp ole double %x, 0.000000e+00 @@ -401,10 +411,12 @@ define double @ule_inverse(double %x, double %y) nounwind { ; CHECK: ugt_x: ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ugt_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ugt_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ugt_x(double %x) nounwind { %c = fcmp ugt double %x, 0.000000e+00 @@ -415,10 +427,12 @@ define double @ugt_x(double %x) nounwind { ; CHECK: ult_x: ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: ult_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ult_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ult_x(double %x) nounwind { %c = fcmp ult double %x, 0.000000e+00 @@ -468,10 +482,12 @@ define double @ult_inverse_x(double %x) nounwind { ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: uge_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @uge_x(double %x) nounwind { %c = fcmp uge double %x, 0.000000e+00 @@ -485,10 +501,12 @@ define double @uge_x(double %x) nounwind { ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ule_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ule_x(double %x) nounwind { %c = fcmp ule double %x, 0.000000e+00 @@ -497,7 +515,8 @@ define double @ule_x(double %x) nounwind { } ; CHECK: uge_inverse_x: -; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -516,7 +535,8 @@ define double @uge_inverse_x(double %x) nounwind { } ; CHECK: ule_inverse_x: -; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index 1e04f19ee89..39c9b770d5f 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { @@ -14,8 +14,8 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK: test2: ; CHECK: pcmp -; CHECK: pxor LCP -; CHECK: movdqa +; CHECK: pcmp +; CHECK: pxor ; CHECK: ret %C = icmp sge <4 x i32> %A, %B %D = sext <4 x i1> %C to <4 x i32>