mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-25 13:24:46 +00:00
X86 Peephole: fold loads to the source register operand if possible.
Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160919 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -14,6 +14,7 @@
|
|||||||
#ifndef LLVM_TARGET_TARGETINSTRINFO_H
|
#ifndef LLVM_TARGET_TARGETINSTRINFO_H
|
||||||
#define LLVM_TARGET_TARGETINSTRINFO_H
|
#define LLVM_TARGET_TARGETINSTRINFO_H
|
||||||
|
|
||||||
|
#include "llvm/ADT/SmallSet.h"
|
||||||
#include "llvm/MC/MCInstrInfo.h"
|
#include "llvm/MC/MCInstrInfo.h"
|
||||||
#include "llvm/CodeGen/DFAPacketizer.h"
|
#include "llvm/CodeGen/DFAPacketizer.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
@@ -693,6 +694,16 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// optimizeLoadInstr - Try to remove the load by folding it to a register
|
||||||
|
/// operand at the use. We fold the load instructions if and only if the
|
||||||
|
/// def and use are in the same BB.
|
||||||
|
virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
SmallSet<unsigned, 4> &FoldAsLoadDefRegs,
|
||||||
|
MachineInstr *&DefMI) const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
|
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
|
||||||
/// instruction, try to fold the immediate into the use instruction.
|
/// instruction, try to fold the immediate into the use instruction.
|
||||||
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
|
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
|
||||||
|
@@ -78,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused");
|
|||||||
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
|
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
|
||||||
STATISTIC(NumCmps, "Number of compares eliminated");
|
STATISTIC(NumCmps, "Number of compares eliminated");
|
||||||
STATISTIC(NumImmFold, "Number of move immediate folded");
|
STATISTIC(NumImmFold, "Number of move immediate folded");
|
||||||
|
STATISTIC(NumLoadFold, "Number of loads folded");
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class PeepholeOptimizer : public MachineFunctionPass {
|
class PeepholeOptimizer : public MachineFunctionPass {
|
||||||
@@ -441,6 +442,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
SmallPtrSet<MachineInstr*, 8> LocalMIs;
|
SmallPtrSet<MachineInstr*, 8> LocalMIs;
|
||||||
SmallSet<unsigned, 4> ImmDefRegs;
|
SmallSet<unsigned, 4> ImmDefRegs;
|
||||||
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
|
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
|
||||||
|
SmallSet<unsigned, 4> FoldAsLoadDefRegs;
|
||||||
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
||||||
MachineBasicBlock *MBB = &*I;
|
MachineBasicBlock *MBB = &*I;
|
||||||
|
|
||||||
@@ -448,6 +450,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
LocalMIs.clear();
|
LocalMIs.clear();
|
||||||
ImmDefRegs.clear();
|
ImmDefRegs.clear();
|
||||||
ImmDefMIs.clear();
|
ImmDefMIs.clear();
|
||||||
|
FoldAsLoadDefRegs.clear();
|
||||||
|
|
||||||
bool First = true;
|
bool First = true;
|
||||||
MachineBasicBlock::iterator PMII;
|
MachineBasicBlock::iterator PMII;
|
||||||
@@ -489,6 +492,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
|
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MachineInstr *DefMI = 0;
|
||||||
|
MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefRegs,
|
||||||
|
DefMI);
|
||||||
|
if (FoldMI) {
|
||||||
|
// Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
|
||||||
|
LocalMIs.erase(MI);
|
||||||
|
LocalMIs.erase(DefMI);
|
||||||
|
LocalMIs.insert(FoldMI);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
DefMI->eraseFromParent();
|
||||||
|
++NumLoadFold;
|
||||||
|
|
||||||
|
// MI is replaced with FoldMI.
|
||||||
|
Changed = true;
|
||||||
|
PMII = FoldMI;
|
||||||
|
MII = llvm::next(PMII);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
First = false;
|
First = false;
|
||||||
PMII = MII;
|
PMII = MII;
|
||||||
++MII;
|
++MII;
|
||||||
|
@@ -3323,6 +3323,75 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// optimizeLoadInstr - Try to remove the load by folding it to a register
|
||||||
|
/// operand at the use. We fold the load instructions if and only if the
|
||||||
|
/// def and use are in the same BB.
|
||||||
|
MachineInstr* X86InstrInfo::
|
||||||
|
optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
|
||||||
|
SmallSet<unsigned, 4> &FoldAsLoadDefRegs,
|
||||||
|
MachineInstr *&DefMI) const {
|
||||||
|
if (MI->mayStore() || MI->isCall())
|
||||||
|
// To be conservative, we don't fold the loads if there is a store in
|
||||||
|
// between.
|
||||||
|
FoldAsLoadDefRegs.clear();
|
||||||
|
// We only fold loads to a virtual register.
|
||||||
|
if (MI->canFoldAsLoad()) {
|
||||||
|
const MCInstrDesc &MCID = MI->getDesc();
|
||||||
|
if (MCID.getNumDefs() == 1) {
|
||||||
|
unsigned Reg = MI->getOperand(0).getReg();
|
||||||
|
// To reduce compilation time, we check MRI->hasOneUse when inserting
|
||||||
|
// loads. It should be checked when processing uses of the load, since
|
||||||
|
// uses can be removed during peephole.
|
||||||
|
if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->hasOneUse(Reg)) {
|
||||||
|
FoldAsLoadDefRegs.insert(Reg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect information about virtual register operands of MI.
|
||||||
|
DenseMap<unsigned, unsigned> SrcVirtualRegToOp;
|
||||||
|
SmallSet<unsigned, 4> DstVirtualRegs;
|
||||||
|
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
|
||||||
|
MachineOperand &MO = MI->getOperand(i);
|
||||||
|
if (!MO.isReg())
|
||||||
|
continue;
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
if (!TargetRegisterInfo::isVirtualRegister(Reg))
|
||||||
|
continue;
|
||||||
|
if (MO.isDef())
|
||||||
|
DstVirtualRegs.insert(Reg);
|
||||||
|
else if (FoldAsLoadDefRegs.count(Reg)) {
|
||||||
|
// Only handle the case where Reg is used in a single src operand.
|
||||||
|
if (SrcVirtualRegToOp.find(Reg) != SrcVirtualRegToOp.end())
|
||||||
|
SrcVirtualRegToOp.erase(Reg);
|
||||||
|
else
|
||||||
|
SrcVirtualRegToOp.insert(std::make_pair(Reg, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (DenseMap<unsigned, unsigned>::iterator SI = SrcVirtualRegToOp.begin(),
|
||||||
|
SE = SrcVirtualRegToOp.end(); SI != SE; SI++) {
|
||||||
|
// If the virtual register is updated by MI, we can't fold the load.
|
||||||
|
if (DstVirtualRegs.count(SI->first)) continue;
|
||||||
|
|
||||||
|
// Check whether we can fold the def into this operand.
|
||||||
|
DefMI = MRI->getVRegDef(SI->first);
|
||||||
|
assert(DefMI);
|
||||||
|
bool SawStore = false;
|
||||||
|
if (!DefMI->isSafeToMove(this, 0, SawStore))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
SmallVector<unsigned, 8> Ops;
|
||||||
|
Ops.push_back(SI->second);
|
||||||
|
MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
|
||||||
|
if (!FoldMI) continue;
|
||||||
|
FoldAsLoadDefRegs.erase(SI->first);
|
||||||
|
return FoldMI;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
|
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
|
||||||
/// instruction with two undef reads of the register being defined. This is
|
/// instruction with two undef reads of the register being defined. This is
|
||||||
/// used for mapping:
|
/// used for mapping:
|
||||||
|
@@ -387,6 +387,14 @@ public:
|
|||||||
unsigned SrcReg2, int CmpMask, int CmpValue,
|
unsigned SrcReg2, int CmpMask, int CmpValue,
|
||||||
const MachineRegisterInfo *MRI) const;
|
const MachineRegisterInfo *MRI) const;
|
||||||
|
|
||||||
|
/// optimizeLoadInstr - Try to remove the load by folding it to a register
|
||||||
|
/// operand at the use. We fold the load instructions if and only if the
|
||||||
|
/// def and use are in the same BB.
|
||||||
|
virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
SmallSet<unsigned, 4> &FoldAsLoadDefRegs,
|
||||||
|
MachineInstr *&DefMI) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
||||||
MachineFunction::iterator &MFI,
|
MachineFunction::iterator &MFI,
|
||||||
|
@@ -3,8 +3,7 @@
|
|||||||
define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
|
define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: vmovaps
|
; CHECK: vmovaps
|
||||||
; CHECK: vmovaps
|
; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
|
||||||
; CHECK: vinsertf128
|
|
||||||
; CHECK: vmovups
|
; CHECK: vmovups
|
||||||
%A = load <4 x i32>* %Ap
|
%A = load <4 x i32>* %Ap
|
||||||
%B = load <4 x i32>* %Bp
|
%B = load <4 x i32>* %Bp
|
||||||
|
@@ -34,8 +34,7 @@ entry:
|
|||||||
define double @squirt(double* %x) nounwind {
|
define double @squirt(double* %x) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: squirt:
|
; CHECK: squirt:
|
||||||
; CHECK: movsd ([[A0]]), %xmm0
|
; CHECK: sqrtsd ([[A0]]), %xmm0
|
||||||
; CHECK: sqrtsd %xmm0, %xmm0
|
|
||||||
%z = load double* %x
|
%z = load double* %x
|
||||||
%t = call double @llvm.sqrt.f64(double %z)
|
%t = call double @llvm.sqrt.f64(double %z)
|
||||||
ret double %t
|
ret double %t
|
||||||
|
@@ -45,3 +45,29 @@ L:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; rdar://10554090
|
||||||
|
; xor in exit block will be CSE'ed and load will be folded to xor in entry.
|
||||||
|
define i1 @test3(i32* %P, i32* %Q) nounwind {
|
||||||
|
; CHECK: test3:
|
||||||
|
; CHECK: movl 8(%esp), %eax
|
||||||
|
; CHECK: xorl (%eax),
|
||||||
|
; CHECK: j
|
||||||
|
; CHECK-NOT: xor
|
||||||
|
entry:
|
||||||
|
%0 = load i32* %P, align 4
|
||||||
|
%1 = load i32* %Q, align 4
|
||||||
|
%2 = xor i32 %0, %1
|
||||||
|
%3 = and i32 %2, 65535
|
||||||
|
%4 = icmp eq i32 %3, 0
|
||||||
|
br i1 %4, label %exit, label %land.end
|
||||||
|
|
||||||
|
exit:
|
||||||
|
%shr.i.i19 = xor i32 %1, %0
|
||||||
|
%5 = and i32 %shr.i.i19, 2147418112
|
||||||
|
%6 = icmp eq i32 %5, 0
|
||||||
|
br label %land.end
|
||||||
|
|
||||||
|
land.end:
|
||||||
|
%7 = phi i1 [ %6, %exit ], [ false, %entry ]
|
||||||
|
ret i1 %7
|
||||||
|
}
|
||||||
|
@@ -1,11 +1,14 @@
|
|||||||
; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
|
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
|
||||||
; RUN: grep pcmpeqd %t | count 1
|
|
||||||
; RUN: grep xor %t | count 1
|
|
||||||
; RUN: not grep LCP %t
|
|
||||||
|
|
||||||
define <2 x double> @foo() nounwind {
|
define <2 x double> @foo() nounwind {
|
||||||
ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
|
ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
|
||||||
|
; CHECK: foo:
|
||||||
|
; CHECK: pcmpeqd %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
|
||||||
|
; CHECK-NEXT: ret
|
||||||
}
|
}
|
||||||
define <2 x double> @bar() nounwind {
|
define <2 x double> @bar() nounwind {
|
||||||
ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
|
ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
|
||||||
|
; CHECK: bar:
|
||||||
|
; CHECK: xorps %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
|
||||||
|
; CHECK-NEXT: ret
|
||||||
}
|
}
|
||||||
|
@@ -137,16 +137,13 @@ define double @ole_inverse(double %x, double %y) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: ogt_x:
|
; CHECK: ogt_x:
|
||||||
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; CHECK-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; UNSAFE: ogt_x:
|
; UNSAFE: ogt_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: ogt_x:
|
; FINITE: ogt_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @ogt_x(double %x) nounwind {
|
define double @ogt_x(double %x) nounwind {
|
||||||
%c = fcmp ogt double %x, 0.000000e+00
|
%c = fcmp ogt double %x, 0.000000e+00
|
||||||
@@ -155,16 +152,13 @@ define double @ogt_x(double %x) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: olt_x:
|
; CHECK: olt_x:
|
||||||
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; CHECK-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; UNSAFE: olt_x:
|
; UNSAFE: olt_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: olt_x:
|
; FINITE: olt_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @olt_x(double %x) nounwind {
|
define double @olt_x(double %x) nounwind {
|
||||||
%c = fcmp olt double %x, 0.000000e+00
|
%c = fcmp olt double %x, 0.000000e+00
|
||||||
@@ -217,12 +211,10 @@ define double @olt_inverse_x(double %x) nounwind {
|
|||||||
; CHECK: oge_x:
|
; CHECK: oge_x:
|
||||||
; CHECK: ucomisd %xmm1, %xmm0
|
; CHECK: ucomisd %xmm1, %xmm0
|
||||||
; UNSAFE: oge_x:
|
; UNSAFE: oge_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: oge_x:
|
; FINITE: oge_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @oge_x(double %x) nounwind {
|
define double @oge_x(double %x) nounwind {
|
||||||
%c = fcmp oge double %x, 0.000000e+00
|
%c = fcmp oge double %x, 0.000000e+00
|
||||||
@@ -233,12 +225,10 @@ define double @oge_x(double %x) nounwind {
|
|||||||
; CHECK: ole_x:
|
; CHECK: ole_x:
|
||||||
; CHECK: ucomisd %xmm0, %xmm1
|
; CHECK: ucomisd %xmm0, %xmm1
|
||||||
; UNSAFE: ole_x:
|
; UNSAFE: ole_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: ole_x:
|
; FINITE: ole_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @ole_x(double %x) nounwind {
|
define double @ole_x(double %x) nounwind {
|
||||||
%c = fcmp ole double %x, 0.000000e+00
|
%c = fcmp ole double %x, 0.000000e+00
|
||||||
@@ -411,12 +401,10 @@ define double @ule_inverse(double %x, double %y) nounwind {
|
|||||||
; CHECK: ugt_x:
|
; CHECK: ugt_x:
|
||||||
; CHECK: ucomisd %xmm0, %xmm1
|
; CHECK: ucomisd %xmm0, %xmm1
|
||||||
; UNSAFE: ugt_x:
|
; UNSAFE: ugt_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: ugt_x:
|
; FINITE: ugt_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @ugt_x(double %x) nounwind {
|
define double @ugt_x(double %x) nounwind {
|
||||||
%c = fcmp ugt double %x, 0.000000e+00
|
%c = fcmp ugt double %x, 0.000000e+00
|
||||||
@@ -427,12 +415,10 @@ define double @ugt_x(double %x) nounwind {
|
|||||||
; CHECK: ult_x:
|
; CHECK: ult_x:
|
||||||
; CHECK: ucomisd %xmm1, %xmm0
|
; CHECK: ucomisd %xmm1, %xmm0
|
||||||
; UNSAFE: ult_x:
|
; UNSAFE: ult_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: ult_x:
|
; FINITE: ult_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @ult_x(double %x) nounwind {
|
define double @ult_x(double %x) nounwind {
|
||||||
%c = fcmp ult double %x, 0.000000e+00
|
%c = fcmp ult double %x, 0.000000e+00
|
||||||
@@ -482,12 +468,10 @@ define double @ult_inverse_x(double %x) nounwind {
|
|||||||
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
|
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; UNSAFE: uge_x:
|
; UNSAFE: uge_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: uge_x:
|
; FINITE: uge_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @uge_x(double %x) nounwind {
|
define double @uge_x(double %x) nounwind {
|
||||||
%c = fcmp uge double %x, 0.000000e+00
|
%c = fcmp uge double %x, 0.000000e+00
|
||||||
@@ -501,12 +485,10 @@ define double @uge_x(double %x) nounwind {
|
|||||||
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
|
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; UNSAFE: ule_x:
|
; UNSAFE: ule_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; UNSAFE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; UNSAFE-NEXT: ret
|
; UNSAFE-NEXT: ret
|
||||||
; FINITE: ule_x:
|
; FINITE: ule_x:
|
||||||
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; FINITE-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; FINITE-NEXT: ret
|
; FINITE-NEXT: ret
|
||||||
define double @ule_x(double %x) nounwind {
|
define double @ule_x(double %x) nounwind {
|
||||||
%c = fcmp ule double %x, 0.000000e+00
|
%c = fcmp ule double %x, 0.000000e+00
|
||||||
@@ -515,8 +497,7 @@ define double @ule_x(double %x) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: uge_inverse_x:
|
; CHECK: uge_inverse_x:
|
||||||
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
|
||||||
; CHECK-NEXT: minsd %xmm1, %xmm0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; UNSAFE: uge_inverse_x:
|
; UNSAFE: uge_inverse_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
||||||
@@ -535,8 +516,7 @@ define double @uge_inverse_x(double %x) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: ule_inverse_x:
|
; CHECK: ule_inverse_x:
|
||||||
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
|
||||||
; CHECK-NEXT: maxsd %xmm1, %xmm0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; UNSAFE: ule_inverse_x:
|
; UNSAFE: ule_inverse_x:
|
||||||
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
|
||||||
|
@@ -14,8 +14,8 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
|
|||||||
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
|
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||||
; CHECK: test2:
|
; CHECK: test2:
|
||||||
; CHECK: pcmp
|
; CHECK: pcmp
|
||||||
; CHECK: pcmp
|
; CHECK: pxor LCP
|
||||||
; CHECK: pxor
|
; CHECK: movdqa
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
%C = icmp sge <4 x i32> %A, %B
|
%C = icmp sge <4 x i32> %A, %B
|
||||||
%D = sext <4 x i1> %C to <4 x i32>
|
%D = sext <4 x i1> %C to <4 x i32>
|
||||||
|
Reference in New Issue
Block a user