mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
[X86] Special-case 2x CMOV when custom-inserting.
This lets us avoid a few copies that are otherwise hard to get rid of. The way this is done is, the custom-inserter looks at the following instruction for another CMOV, and replaces both at the same time. A previous version used a new CMOV2 opcode, but the custom inserter is expected to be able to return a different basic block anyway, which means it's OK - though far from ideal - to alter that block's contents. Explicitly document that, in case it ever makes a difference. Alternatives welcome! Follow-up to r231045. rdar://19767934 Closes http://reviews.llvm.org/D8019 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231046 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8b5527deef
commit
14593eb417
@ -2664,6 +2664,8 @@ public:
|
||||
/// is created but not inserted into any basic blocks, and this method is
|
||||
/// called to expand it into a sequence of instructions, potentially also
|
||||
/// creating new basic blocks and control flow.
|
||||
/// As long as the returned basic block is different (i.e., we created a new
|
||||
/// one), the custom inserter is free to modify the rest of \p MBB.
|
||||
virtual MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
|
||||
|
||||
|
@ -18108,6 +18108,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
|
||||
// fallthrough --> copy0MBB
|
||||
MachineBasicBlock *thisMBB = BB;
|
||||
MachineFunction *F = BB->getParent();
|
||||
|
||||
// We also lower double CMOVs:
|
||||
// (CMOV (CMOV F, T, cc1), T, cc2)
|
||||
// to two successives branches. For that, we look for another CMOV as the
|
||||
// following instruction.
|
||||
//
|
||||
// Without this, we would add a PHI between the two jumps, which ends up
|
||||
// creating a few copies all around. For instance, for
|
||||
//
|
||||
// (sitofp (zext (fcmp une)))
|
||||
//
|
||||
// we would generate:
|
||||
//
|
||||
// ucomiss %xmm1, %xmm0
|
||||
// movss <1.0f>, %xmm0
|
||||
// movaps %xmm0, %xmm1
|
||||
// jne .LBB5_2
|
||||
// xorps %xmm1, %xmm1
|
||||
// .LBB5_2:
|
||||
// jp .LBB5_4
|
||||
// movaps %xmm1, %xmm0
|
||||
// .LBB5_4:
|
||||
// retq
|
||||
//
|
||||
// because this custom-inserter would have generated:
|
||||
//
|
||||
// A
|
||||
// | \
|
||||
// | B
|
||||
// | /
|
||||
// C
|
||||
// | \
|
||||
// | D
|
||||
// | /
|
||||
// E
|
||||
//
|
||||
// A: X = ...; Y = ...
|
||||
// B: empty
|
||||
// C: Z = PHI [X, A], [Y, B]
|
||||
// D: empty
|
||||
// E: PHI [X, C], [Z, D]
|
||||
//
|
||||
// If we lower both CMOVs in a single step, we can instead generate:
|
||||
//
|
||||
// A
|
||||
// | \
|
||||
// | C
|
||||
// | /|
|
||||
// |/ |
|
||||
// | |
|
||||
// | D
|
||||
// | /
|
||||
// E
|
||||
//
|
||||
// A: X = ...; Y = ...
|
||||
// D: empty
|
||||
// E: PHI [X, A], [X, C], [Y, D]
|
||||
//
|
||||
// Which, in our sitofp/fcmp example, gives us something like:
|
||||
//
|
||||
// ucomiss %xmm1, %xmm0
|
||||
// movss <1.0f>, %xmm0
|
||||
// jne .LBB5_4
|
||||
// jp .LBB5_4
|
||||
// xorps %xmm0, %xmm0
|
||||
// .LBB5_4:
|
||||
// retq
|
||||
//
|
||||
MachineInstr *NextCMOV = nullptr;
|
||||
MachineBasicBlock::iterator NextMIIt =
|
||||
std::next(MachineBasicBlock::iterator(MI));
|
||||
if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
|
||||
NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
|
||||
NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
|
||||
NextCMOV = &*NextMIIt;
|
||||
|
||||
MachineBasicBlock *jcc1MBB = nullptr;
|
||||
|
||||
// If we have a double CMOV, we lower it to two successive branches to
|
||||
// the same block. EFLAGS is used by both, so mark it as live in the second.
|
||||
if (NextCMOV) {
|
||||
jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
F->insert(It, jcc1MBB);
|
||||
jcc1MBB->addLiveIn(X86::EFLAGS);
|
||||
}
|
||||
|
||||
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
F->insert(It, copy0MBB);
|
||||
@ -18116,8 +18202,10 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
|
||||
// If the EFLAGS register isn't dead in the terminator, then claim that it's
|
||||
// live into the sink and copy blocks.
|
||||
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
if (!MI->killsRegister(X86::EFLAGS) &&
|
||||
!checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
|
||||
|
||||
MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
|
||||
if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
|
||||
!checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
|
||||
copy0MBB->addLiveIn(X86::EFLAGS);
|
||||
sinkMBB->addLiveIn(X86::EFLAGS);
|
||||
}
|
||||
@ -18128,7 +18216,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
|
||||
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
|
||||
|
||||
// Add the true and fallthrough blocks as its successors.
|
||||
BB->addSuccessor(copy0MBB);
|
||||
if (NextCMOV) {
|
||||
// The fallthrough block may be jcc1MBB, if we have a double CMOV.
|
||||
BB->addSuccessor(jcc1MBB);
|
||||
|
||||
// In that case, jcc1MBB will itself fallthrough the copy0MBB, and
|
||||
// jump to the sinkMBB.
|
||||
jcc1MBB->addSuccessor(copy0MBB);
|
||||
jcc1MBB->addSuccessor(sinkMBB);
|
||||
} else {
|
||||
BB->addSuccessor(copy0MBB);
|
||||
}
|
||||
|
||||
// The true block target of the first (or only) branch is always sinkMBB.
|
||||
BB->addSuccessor(sinkMBB);
|
||||
|
||||
// Create the conditional branch instruction.
|
||||
@ -18136,6 +18236,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
|
||||
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
|
||||
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
|
||||
|
||||
if (NextCMOV) {
|
||||
unsigned Opc2 = X86::GetCondBranchFromCond(
|
||||
(X86::CondCode)NextCMOV->getOperand(3).getImm());
|
||||
BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
|
||||
}
|
||||
|
||||
// copy0MBB:
|
||||
// %FalseValue = ...
|
||||
// # fallthrough to sinkMBB
|
||||
@ -18144,10 +18250,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
|
||||
// sinkMBB:
|
||||
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
|
||||
// ...
|
||||
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
|
||||
TII->get(X86::PHI), MI->getOperand(0).getReg())
|
||||
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
|
||||
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
|
||||
MI->getOperand(0).getReg())
|
||||
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
|
||||
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
|
||||
|
||||
// If we have a double CMOV, the second Jcc provides the same incoming
|
||||
// value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
|
||||
if (NextCMOV) {
|
||||
MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
|
||||
// Copy the PHI result to the register defined by the second CMOV.
|
||||
BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
|
||||
DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
|
||||
.addReg(MI->getOperand(0).getReg());
|
||||
NextCMOV->eraseFromParent();
|
||||
}
|
||||
|
||||
MI->eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return sinkMBB;
|
||||
|
@ -15,21 +15,18 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
; CMOV-NEXT: movl %edi, %eax
|
||||
; CMOV-NEXT: retq
|
||||
|
||||
; NOCMOV-NEXT: flds 8(%esp)
|
||||
; NOCMOV-NEXT: flds 4(%esp)
|
||||
; NOCMOV-NEXT: fucompp
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal 16(%esp), %eax
|
||||
; NOCMOV-NEXT: movl %eax, %ecx
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 12(%esp), %ecx
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %ecx, %eax
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: movl (%eax), %eax
|
||||
; NOCMOV-NEXT: retl
|
||||
; NOCMOV-NEXT: flds 8(%esp)
|
||||
; NOCMOV-NEXT: flds 4(%esp)
|
||||
; NOCMOV-NEXT: fucompp
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal 16(%esp), %eax
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 12(%esp), %eax
|
||||
; NOCMOV-NEXT:[[TBB]]:
|
||||
; NOCMOV-NEXT: movl (%eax), %eax
|
||||
; NOCMOV-NEXT: retl
|
||||
define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
|
||||
entry:
|
||||
%cmp = fcmp oeq float %a, %b
|
||||
@ -51,13 +48,10 @@ entry:
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal 20(%esp), %ecx
|
||||
; NOCMOV-NEXT: movl %ecx, %eax
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 12(%esp), %eax
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %eax, %ecx
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 12(%esp), %ecx
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: movl (%ecx), %eax
|
||||
; NOCMOV-NEXT: orl $4, %ecx
|
||||
; NOCMOV-NEXT: movl (%ecx), %edx
|
||||
@ -83,13 +77,10 @@ entry:
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal 12(%esp), %ecx
|
||||
; NOCMOV-NEXT: movl %ecx, %eax
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 20(%esp), %eax
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %eax, %ecx
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 20(%esp), %ecx
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: movl (%ecx), %eax
|
||||
; NOCMOV-NEXT: orl $4, %ecx
|
||||
; NOCMOV-NEXT: movl (%ecx), %edx
|
||||
@ -104,13 +95,10 @@ entry:
|
||||
; CHECK-LABEL: test_select_fcmp_oeq_f64:
|
||||
|
||||
; CMOV-NEXT: ucomiss %xmm1, %xmm0
|
||||
; CMOV-NEXT: movaps %xmm3, %xmm0
|
||||
; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movaps %xmm2, %xmm0
|
||||
; CMOV-NEXT: [[TBB1]]:
|
||||
; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movaps %xmm0, %xmm3
|
||||
; CMOV-NEXT: [[TBB2]]:
|
||||
; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: jp [[TBB]]
|
||||
; CMOV-NEXT: movaps %xmm2, %xmm3
|
||||
; CMOV-NEXT: [[TBB]]:
|
||||
; CMOV-NEXT: movaps %xmm3, %xmm0
|
||||
; CMOV-NEXT: retq
|
||||
|
||||
@ -120,13 +108,10 @@ entry:
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal 20(%esp), %eax
|
||||
; NOCMOV-NEXT: movl %eax, %ecx
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 12(%esp), %ecx
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %ecx, %eax
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 12(%esp), %eax
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: fldl (%eax)
|
||||
; NOCMOV-NEXT: retl
|
||||
define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
|
||||
@ -139,68 +124,51 @@ entry:
|
||||
; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
|
||||
|
||||
; CMOV-NEXT: ucomiss %xmm1, %xmm0
|
||||
; CMOV-NEXT: movaps %xmm3, %xmm0
|
||||
; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movaps %xmm2, %xmm0
|
||||
; CMOV-NEXT: [[TBB1]]:
|
||||
; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movaps %xmm0, %xmm3
|
||||
; CMOV-NEXT: [[TBB2]]:
|
||||
; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: jp [[TBB]]
|
||||
; CMOV-NEXT: movaps %xmm2, %xmm3
|
||||
; CMOV-NEXT: [[TBB]]:
|
||||
; CMOV-NEXT: movaps %xmm3, %xmm0
|
||||
; CMOV-NEXT: retq
|
||||
|
||||
; NOCMOV-NEXT: pushl %ebx
|
||||
; NOCMOV-NEXT: pushl %edi
|
||||
; NOCMOV-NEXT: pushl %esi
|
||||
; NOCMOV-NEXT: flds 24(%esp)
|
||||
; NOCMOV-NEXT: flds 20(%esp)
|
||||
; NOCMOV-NEXT: flds 16(%esp)
|
||||
; NOCMOV-NEXT: fucompp
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal 44(%esp), %eax
|
||||
; NOCMOV-NEXT: movl %eax, %ecx
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 28(%esp), %ecx
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %ecx, %eax
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: leal 40(%esp), %eax
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 24(%esp), %eax
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: movl (%eax), %eax
|
||||
; NOCMOV-NEXT: leal 48(%esp), %ecx
|
||||
; NOCMOV-NEXT: movl %ecx, %edx
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 32(%esp), %edx
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %edx, %ecx
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: leal 44(%esp), %ecx
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 28(%esp), %ecx
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: movl (%ecx), %ecx
|
||||
; NOCMOV-NEXT: leal 52(%esp), %edx
|
||||
; NOCMOV-NEXT: movl %edx, %esi
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 36(%esp), %esi
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %esi, %edx
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: movl (%edx), %edx
|
||||
; NOCMOV-NEXT: leal 56(%esp), %esi
|
||||
; NOCMOV-NEXT: movl %esi, %ebx
|
||||
; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: leal 40(%esp), %ebx
|
||||
; NOCMOV-NEXT: [[TBB1]]:
|
||||
; NOCMOV-NEXT: movl 16(%esp), %edi
|
||||
; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: movl %ebx, %esi
|
||||
; NOCMOV-NEXT: [[TBB2]]:
|
||||
; NOCMOV-NEXT: leal 48(%esp), %esi
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 32(%esp), %esi
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: movl 12(%esp), %edx
|
||||
; NOCMOV-NEXT: movl (%esi), %esi
|
||||
; NOCMOV-NEXT: movl %esi, 12(%edi)
|
||||
; NOCMOV-NEXT: movl %edx, 8(%edi)
|
||||
; NOCMOV-NEXT: movl %ecx, 4(%edi)
|
||||
; NOCMOV-NEXT: movl %eax, (%edi)
|
||||
; NOCMOV-NEXT: leal 52(%esp), %edi
|
||||
; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; NOCMOV-NEXT: jp [[TBB]]
|
||||
; NOCMOV-NEXT: leal 36(%esp), %edi
|
||||
; NOCMOV-NEXT: [[TBB]]:
|
||||
; NOCMOV-NEXT: movl (%edi), %edi
|
||||
; NOCMOV-NEXT: movl %edi, 12(%edx)
|
||||
; NOCMOV-NEXT: movl %esi, 8(%edx)
|
||||
; NOCMOV-NEXT: movl %ecx, 4(%edx)
|
||||
; NOCMOV-NEXT: movl %eax, (%edx)
|
||||
; NOCMOV-NEXT: popl %esi
|
||||
; NOCMOV-NEXT: popl %edi
|
||||
; NOCMOV-NEXT: popl %ebx
|
||||
; NOCMOV-NEXT: retl $4
|
||||
define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
|
||||
entry:
|
||||
@ -217,17 +185,14 @@ entry:
|
||||
; CHECK-LABEL: test_zext_fcmp_une:
|
||||
; CMOV-NEXT: ucomiss %xmm1, %xmm0
|
||||
; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
|
||||
; CMOV-NEXT: movaps %xmm0, %xmm1
|
||||
; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: xorps %xmm1, %xmm1
|
||||
; CMOV-NEXT: [[TBB1]]:
|
||||
; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movaps %xmm1, %xmm0
|
||||
; CMOV-NEXT: [[TBB2]]:
|
||||
; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: jp [[TBB]]
|
||||
; CMOV-NEXT: xorps %xmm0, %xmm0
|
||||
; CMOV-NEXT: [[TBB]]:
|
||||
; CMOV-NEXT: retq
|
||||
|
||||
; NOCMOV: jne
|
||||
; NOCMOV: jp
|
||||
; NOCMOV: jne
|
||||
; NOCMOV-NEXT: jp
|
||||
define float @test_zext_fcmp_une(float %a, float %b) #0 {
|
||||
entry:
|
||||
%cmp = fcmp une float %a, %b
|
||||
@ -242,17 +207,14 @@ entry:
|
||||
; CHECK-LABEL: test_zext_fcmp_oeq:
|
||||
; CMOV-NEXT: ucomiss %xmm1, %xmm0
|
||||
; CMOV-NEXT: xorps %xmm0, %xmm0
|
||||
; CMOV-NEXT: xorps %xmm1, %xmm1
|
||||
; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm1
|
||||
; CMOV-NEXT: [[TBB1]]:
|
||||
; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: movaps %xmm1, %xmm0
|
||||
; CMOV-NEXT: [[TBB2]]:
|
||||
; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
|
||||
; CMOV-NEXT: jp [[TBB]]
|
||||
; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
|
||||
; CMOV-NEXT: [[TBB]]:
|
||||
; CMOV-NEXT: retq
|
||||
|
||||
; NOCMOV: jne
|
||||
; NOCMOV: jp
|
||||
; NOCMOV: jne
|
||||
; NOCMOV-NEXT: jp
|
||||
define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
|
||||
entry:
|
||||
%cmp = fcmp oeq float %a, %b
|
||||
|
Loading…
x
Reference in New Issue
Block a user