mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
implement an optimization to codegen c ? 1.0 : 2.0 as load { 2.0, 1.0 } + c*4.
For 2009-03-07-FPConstSelect.ll we now produce: _f: xorl %eax, %eax testl %edi, %edi movl $4, %ecx cmovne %rax, %rcx leaq LCPI1_0(%rip), %rax movss (%rcx,%rax), %xmm0 ret previously we produced: _f: subl $4, %esp cmpl $0, 8(%esp) movss LCPI1_0, %xmm0 je LBB1_2 ## entry LBB1_1: ## entry movss LCPI1_1, %xmm0 LBB1_2: ## entry movss %xmm0, (%esp) flds (%esp) addl $4, %esp ret on PPC the code also improves to: _f: cntlzw r2, r3 srwi r2, r2, 5 li r3, lo16(LCPI1_0) slwi r2, r2, 2 addis r3, r3, ha16(LCPI1_0) lfsx f1, r3, r2 blr from: _f: li r2, lo16(LCPI1_1) cmplwi cr0, r3, 0 addis r2, r2, ha16(LCPI1_1) beq cr0, LBB1_2 ; entry LBB1_1: ; entry li r2, lo16(LCPI1_0) addis r2, r2, ha16(LCPI1_0) LBB1_2: ; entry lfs f1, 0(r2) blr This also improves the existing pic-cpool case from: foo: subl $12, %esp call .Lllvm$1.$piclabel .Lllvm$1.$piclabel: popl %eax addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax cmpl $0, 16(%esp) movsd .LCPI1_0@GOTOFF(%eax), %xmm0 je .LBB1_2 # entry .LBB1_1: # entry movsd .LCPI1_1@GOTOFF(%eax), %xmm0 .LBB1_2: # entry movsd %xmm0, (%esp) fldl (%esp) addl $12, %esp ret to: foo: call .Lllvm$1.$piclabel .Lllvm$1.$piclabel: popl %eax addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax xorl %ecx, %ecx cmpl $0, 4(%esp) movl $8, %edx cmovne %ecx, %edx fldl .LCPI1_0@GOTOFF(%eax,%edx) ret This triggers a few dozen times in spec FP 2000. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66358 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d2e936a513
commit
476769498e
@ -14,8 +14,10 @@
|
||||
|
||||
#define DEBUG_TYPE "dagcombine"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetFrameInfo.h"
|
||||
@ -2890,8 +2892,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
|
||||
return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
|
||||
N0.getOperand(0), N0.getOperand(1),
|
||||
N1, N2, N0.getOperand(2));
|
||||
else
|
||||
return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
|
||||
return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -5674,9 +5675,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
|
||||
return false;
|
||||
}
|
||||
|
||||
/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
|
||||
/// where 'cond' is the comparison specified by CC.
|
||||
SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
|
||||
SDValue N2, SDValue N3,
|
||||
ISD::CondCode CC, bool NotExtCompare) {
|
||||
// (x ? y : y) -> y.
|
||||
if (N2 == N3) return N2;
|
||||
|
||||
MVT VT = N2.getValueType();
|
||||
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
|
||||
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
|
||||
@ -5713,6 +5719,51 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
|
||||
}
|
||||
}
|
||||
|
||||
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
|
||||
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
|
||||
// in it. This is a win when the constant is not otherwise available because
|
||||
// it replaces two constant pool loads with one. We only do this if the FP
|
||||
// type is known to be legal, because if it isn't, then we are before legalize
|
||||
// types an we want the other legalization to happen first (e.g. to avoid
|
||||
// messing with soft float).
|
||||
if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
|
||||
if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
|
||||
if (TLI.isTypeLegal(N2.getValueType()) &&
|
||||
// If both constants have multiple uses, then we won't need to do an
|
||||
// extra load, they are likely around in registers for other users.
|
||||
(TV->hasOneUse() || FV->hasOneUse())) {
|
||||
Constant *Elts[] = {
|
||||
const_cast<ConstantFP*>(FV->getConstantFPValue()),
|
||||
const_cast<ConstantFP*>(TV->getConstantFPValue())
|
||||
};
|
||||
// Create a ConstantArray of the two constants.
|
||||
Constant *CA =
|
||||
ConstantArray::get(ArrayType::get(Elts[0]->getType(), 2), Elts, 2);
|
||||
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy());
|
||||
unsigned Alignment =
|
||||
1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
|
||||
|
||||
// Get the offsets to the 0 and 1 element of the array so that we can
|
||||
// select between them.
|
||||
SDValue Zero = DAG.getIntPtrConstant(0);
|
||||
unsigned EltSize =
|
||||
(unsigned)TLI.getTargetData()->getTypePaddedSize(Elts[0]->getType());
|
||||
SDValue One = DAG.getIntPtrConstant(EltSize);
|
||||
|
||||
SDValue Cond = DAG.getSetCC(DL,
|
||||
TLI.getSetCCResultType(N0.getValueType()),
|
||||
N0, N1, CC);
|
||||
SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
|
||||
Cond, One, Zero);
|
||||
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
|
||||
CstOffset);
|
||||
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
|
||||
PseudoSourceValue::getConstantPool(), 0, false,
|
||||
Alignment);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Check to see if we can perform the "gzip trick", transforming
|
||||
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
|
||||
if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
|
||||
|
12
test/CodeGen/X86/2009-03-07-FPConstSelect.ll
Normal file
12
test/CodeGen/X86/2009-03-07-FPConstSelect.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep xmm
|
||||
; This should do a single load into the fp stack for the return, not diddle with xmm registers.
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i386-apple-darwin7"
|
||||
|
||||
define float @f(i32 %x) nounwind readnone {
|
||||
entry:
|
||||
%0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
|
||||
%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01
|
||||
ret float %iftmp.0.0
|
||||
}
|
@ -2,10 +2,10 @@
|
||||
; RUN: -o %t -f
|
||||
; RUN: grep _GLOBAL_OFFSET_TABLE_ %t
|
||||
; RUN: grep piclabel %t | count 3
|
||||
; RUN: grep GOTOFF %t | count 2
|
||||
; RUN: grep CPI %t | count 4
|
||||
; RUN: grep GOTOFF %t | count 1
|
||||
; RUN: grep CPI %t | count 2
|
||||
|
||||
define double @foo(i32 %a.u) {
|
||||
define double @foo(i32 %a.u) nounwind {
|
||||
entry:
|
||||
%tmp = icmp eq i32 %a.u,0
|
||||
%retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
|
||||
|
Loading…
Reference in New Issue
Block a user