2005-10-18 00:28:58 +00:00
|
|
|
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
|
2005-08-16 17:14:42 +00:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file was developed by Chris Lattner and is distributed under
|
|
|
|
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2005-10-16 05:39:50 +00:00
|
|
|
// This file implements the PPCISelLowering class.
|
2005-08-16 17:14:42 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-10-14 23:59:06 +00:00
|
|
|
#include "PPCISelLowering.h"
|
|
|
|
#include "PPCTargetMachine.h"
|
2006-02-01 07:19:44 +00:00
|
|
|
#include "llvm/ADT/VectorExtras.h"
|
2006-03-13 23:20:37 +00:00
|
|
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
2005-08-16 17:14:42 +00:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2005-08-26 21:23:58 +00:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2005-08-16 17:14:42 +00:00
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2005-09-13 19:33:40 +00:00
|
|
|
#include "llvm/CodeGen/SSARegMap.h"
|
2005-08-26 17:36:52 +00:00
|
|
|
#include "llvm/Constants.h"
|
2005-08-16 17:14:42 +00:00
|
|
|
#include "llvm/Function.h"
|
2006-02-01 07:19:44 +00:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2006-02-18 00:08:58 +00:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2005-08-16 17:14:42 +00:00
|
|
|
using namespace llvm;
|
|
|
|
|
2005-10-16 05:39:50 +00:00
|
|
|
PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
2005-08-16 17:14:42 +00:00
|
|
|
: TargetLowering(TM) {
|
|
|
|
|
|
|
|
// Fold away setcc operations if possible.
|
|
|
|
setSetCCIsExpensive();
|
2005-10-21 00:02:42 +00:00
|
|
|
setPow2DivIsCheap();
|
2005-08-16 17:14:42 +00:00
|
|
|
|
2005-09-27 22:18:25 +00:00
|
|
|
// Use _setjmp/_longjmp instead of setjmp/longjmp.
|
|
|
|
setUseUnderscoreSetJmpLongJmp(true);
|
|
|
|
|
2005-08-16 17:14:42 +00:00
|
|
|
// Set up the register classes.
|
2005-10-18 00:28:58 +00:00
|
|
|
addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
|
|
|
|
addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
|
2005-08-16 17:14:42 +00:00
|
|
|
|
2006-01-29 06:26:08 +00:00
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
|
|
|
|
|
2005-08-16 17:14:42 +00:00
|
|
|
// PowerPC has no intrinsics for these particular operations
|
|
|
|
setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::MEMSET, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
|
|
|
|
|
|
|
|
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
|
|
|
|
setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
|
|
|
|
setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
|
|
|
|
|
|
|
|
// PowerPC has no SREM/UREM instructions
|
|
|
|
setOperationAction(ISD::SREM, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
|
|
|
|
|
|
|
// We don't support sin/cos/sqrt/fmod
|
|
|
|
setOperationAction(ISD::FSIN , MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
2005-09-28 22:29:58 +00:00
|
|
|
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
2005-08-16 17:14:42 +00:00
|
|
|
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
2005-09-28 22:29:58 +00:00
|
|
|
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
2005-08-16 17:14:42 +00:00
|
|
|
|
|
|
|
// If we're enabling GP optimizations, use hardware square root
|
2005-09-02 18:33:05 +00:00
|
|
|
if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
|
2005-08-16 17:14:42 +00:00
|
|
|
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
|
|
|
|
}
|
|
|
|
|
2006-03-05 05:08:37 +00:00
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
|
|
|
|
2006-01-14 03:14:10 +00:00
|
|
|
// PowerPC does not have BSWAP, CTPOP or CTTZ
|
|
|
|
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
|
2005-08-16 17:14:42 +00:00
|
|
|
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
|
|
|
|
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
|
|
|
|
|
2006-01-11 21:21:00 +00:00
|
|
|
// PowerPC does not have ROTR
|
|
|
|
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
|
|
|
|
|
2005-08-16 17:14:42 +00:00
|
|
|
// PowerPC does not have Select
|
|
|
|
setOperationAction(ISD::SELECT, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f64, Expand);
|
2005-08-26 00:52:45 +00:00
|
|
|
|
2005-08-26 17:36:52 +00:00
|
|
|
// PowerPC wants to turn select_cc of FP into fsel when possible.
|
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25848 91177308-0d34-0410-b5e6-96231b3b80d8
2006-01-31 08:17:29 +00:00
|
|
|
|
2006-02-01 07:19:44 +00:00
|
|
|
// PowerPC wants to optimize integer setcc a bit
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25848 91177308-0d34-0410-b5e6-96231b3b80d8
2006-01-31 08:17:29 +00:00
|
|
|
setOperationAction(ISD::SETCC, MVT::i32, Custom);
|
2005-08-31 19:09:57 +00:00
|
|
|
|
2006-03-17 01:40:33 +00:00
|
|
|
// PowerPC does not have BRCOND which requires SetCC
|
|
|
|
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
|
2005-08-16 17:14:42 +00:00
|
|
|
|
2005-08-31 21:09:52 +00:00
|
|
|
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
|
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
|
2005-09-06 22:03:27 +00:00
|
|
|
|
2005-08-17 00:40:22 +00:00
|
|
|
// PowerPC does not have [U|S]INT_TO_FP
|
|
|
|
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
|
|
|
|
|
2005-12-23 05:13:35 +00:00
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
|
|
|
|
|
2005-09-10 00:21:06 +00:00
|
|
|
// PowerPC does not have truncstore for i1.
|
|
|
|
setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
|
2005-11-29 06:16:21 +00:00
|
|
|
|
2006-01-05 01:25:28 +00:00
|
|
|
// Support label based line numbers.
|
2005-11-29 06:16:21 +00:00
|
|
|
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
|
2006-01-05 01:47:43 +00:00
|
|
|
setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
|
2006-01-05 01:25:28 +00:00
|
|
|
// FIXME - use subtarget debug flags
|
2006-01-05 01:47:43 +00:00
|
|
|
if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
|
2006-01-05 01:25:28 +00:00
|
|
|
setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
|
2005-09-10 00:21:06 +00:00
|
|
|
|
2005-12-10 02:36:00 +00:00
|
|
|
// We want to legalize GlobalAddress and ConstantPool nodes into the
|
|
|
|
// appropriate instructions to materialize the address.
|
2005-11-17 18:26:56 +00:00
|
|
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
2005-12-10 02:36:00 +00:00
|
|
|
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
|
2006-01-13 02:42:53 +00:00
|
|
|
|
2006-01-27 21:09:22 +00:00
|
|
|
// RET must be custom lowered, to meet ABI requirements
|
|
|
|
setOperationAction(ISD::RET , MVT::Other, Custom);
|
|
|
|
|
2006-01-25 18:21:52 +00:00
|
|
|
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
|
|
|
|
setOperationAction(ISD::VASTART , MVT::Other, Custom);
|
|
|
|
|
2006-01-15 09:02:48 +00:00
|
|
|
// Use the default implementation.
|
2006-01-25 18:21:52 +00:00
|
|
|
setOperationAction(ISD::VAARG , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::VAEND , MVT::Other, Expand);
|
2006-01-15 09:02:48 +00:00
|
|
|
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
|
2005-11-17 07:30:41 +00:00
|
|
|
|
2005-09-06 22:03:27 +00:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
|
2005-10-18 00:28:58 +00:00
|
|
|
// They also have instructions for converting between i64 and fp.
|
2005-09-06 22:03:27 +00:00
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
|
2006-03-24 07:53:47 +00:00
|
|
|
|
|
|
|
// FIXME: disable this lowered code. This generates 64-bit register values,
|
|
|
|
// and we don't model the fact that the top part is clobbered by calls. We
|
|
|
|
// need to flag these together so that the value isn't live across a call.
|
|
|
|
//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
|
|
|
|
|
2005-10-25 23:48:36 +00:00
|
|
|
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
|
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
|
|
|
|
} else {
|
2005-11-17 07:30:41 +00:00
|
|
|
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
|
2005-10-25 23:48:36 +00:00
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
|
2005-10-18 00:56:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
|
|
|
|
// 64 bit PowerPC implementations can support i64 types directly
|
|
|
|
addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
|
2005-10-18 00:28:58 +00:00
|
|
|
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
|
|
|
|
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
|
|
|
|
} else {
|
|
|
|
// 32 bit PowerPC wants to expand i64 shifts itself.
|
|
|
|
setOperationAction(ISD::SHL, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SRL, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SRA, MVT::i64, Custom);
|
2005-09-06 22:03:27 +00:00
|
|
|
}
|
|
|
|
|
2006-03-01 01:11:20 +00:00
|
|
|
// First set operation action for all vector types to expand. Then we
|
|
|
|
// will selectively turn on ones that can be effectively codegen'd.
|
|
|
|
for (unsigned VT = (unsigned)MVT::Vector + 1;
|
|
|
|
VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
|
|
|
|
setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
|
2006-03-20 01:53:53 +00:00
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
|
2006-03-21 20:51:05 +00:00
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
|
2006-03-20 01:53:53 +00:00
|
|
|
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
|
2006-03-01 01:11:20 +00:00
|
|
|
}
|
|
|
|
|
2005-11-29 08:17:20 +00:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
|
|
|
|
addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
|
2005-12-19 23:25:09 +00:00
|
|
|
addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
|
2006-01-29 06:32:58 +00:00
|
|
|
|
2006-03-01 01:11:20 +00:00
|
|
|
setOperationAction(ISD::ADD , MVT::v4f32, Legal);
|
|
|
|
setOperationAction(ISD::SUB , MVT::v4f32, Legal);
|
|
|
|
setOperationAction(ISD::MUL , MVT::v4f32, Legal);
|
|
|
|
setOperationAction(ISD::LOAD , MVT::v4f32, Legal);
|
|
|
|
setOperationAction(ISD::ADD , MVT::v4i32, Legal);
|
|
|
|
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
|
2006-03-20 01:53:53 +00:00
|
|
|
setOperationAction(ISD::LOAD , MVT::v16i8, Legal);
|
|
|
|
|
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
|
|
|
|
|
2006-03-19 06:55:52 +00:00
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
|
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
|
2006-03-24 07:48:08 +00:00
|
|
|
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
2005-11-29 08:17:20 +00:00
|
|
|
}
|
|
|
|
|
2005-08-16 17:14:42 +00:00
|
|
|
setSetCCResultContents(ZeroOrOneSetCCResult);
|
2006-01-13 17:52:03 +00:00
|
|
|
setStackPointerRegisterToSaveRestore(PPC::R1);
|
2005-08-16 17:14:42 +00:00
|
|
|
|
2006-03-01 04:57:39 +00:00
|
|
|
// We have target-specific dag combine patterns for the following nodes:
|
|
|
|
setTargetDAGCombine(ISD::SINT_TO_FP);
|
2006-03-01 05:50:56 +00:00
|
|
|
setTargetDAGCombine(ISD::STORE);
|
2006-03-01 04:57:39 +00:00
|
|
|
|
2005-08-16 17:14:42 +00:00
|
|
|
computeRegisterProperties();
|
|
|
|
}
|
|
|
|
|
2006-01-09 23:52:17 +00:00
|
|
|
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|
|
|
switch (Opcode) {
|
|
|
|
default: return 0;
|
|
|
|
case PPCISD::FSEL: return "PPCISD::FSEL";
|
|
|
|
case PPCISD::FCFID: return "PPCISD::FCFID";
|
|
|
|
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
|
|
|
|
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
|
2006-03-01 05:50:56 +00:00
|
|
|
case PPCISD::STFIWX: return "PPCISD::STFIWX";
|
2006-01-09 23:52:17 +00:00
|
|
|
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
|
|
|
|
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
|
2006-03-19 06:55:52 +00:00
|
|
|
case PPCISD::LVE_X: return "PPCISD::LVE_X";
|
2006-03-20 01:53:53 +00:00
|
|
|
case PPCISD::VPERM: return "PPCISD::VPERM";
|
2006-01-09 23:52:17 +00:00
|
|
|
case PPCISD::Hi: return "PPCISD::Hi";
|
|
|
|
case PPCISD::Lo: return "PPCISD::Lo";
|
|
|
|
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
|
|
|
case PPCISD::SRL: return "PPCISD::SRL";
|
|
|
|
case PPCISD::SRA: return "PPCISD::SRA";
|
|
|
|
case PPCISD::SHL: return "PPCISD::SHL";
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-22 05:30:33 +00:00
|
|
|
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
|
|
|
|
case PPCISD::STD_32: return "PPCISD::STD_32";
|
2006-01-28 07:33:03 +00:00
|
|
|
case PPCISD::CALL: return "PPCISD::CALL";
|
2006-01-09 23:52:17 +00:00
|
|
|
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-08-26 17:36:52 +00:00
|
|
|
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
|
|
|
|
static bool isFloatingPointZero(SDOperand Op) {
|
|
|
|
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
|
|
|
|
return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
|
|
|
|
else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
|
|
|
|
// Maybe this has already been legalized into the constant pool?
|
|
|
|
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
|
|
|
|
if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
|
|
|
|
return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-03-20 06:33:01 +00:00
|
|
|
|
|
|
|
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
|
|
|
|
/// specifies a splat of a single element that is suitable for input to
|
|
|
|
/// VSPLTB/VSPLTH/VSPLTW.
|
|
|
|
bool PPC::isSplatShuffleMask(SDNode *N) {
|
|
|
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
2006-03-20 06:51:10 +00:00
|
|
|
|
|
|
|
// We can only splat 8-bit, 16-bit, and 32-bit quantities.
|
|
|
|
if (N->getNumOperands() != 4 && N->getNumOperands() != 8 &&
|
|
|
|
N->getNumOperands() != 16)
|
|
|
|
return false;
|
|
|
|
|
2006-03-20 06:37:44 +00:00
|
|
|
// This is a splat operation if each element of the permute is the same, and
|
|
|
|
// if the value doesn't reference the second vector.
|
|
|
|
SDOperand Elt = N->getOperand(0);
|
|
|
|
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
|
|
|
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
|
|
|
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
|
|
|
"Invalid VECTOR_SHUFFLE mask!");
|
|
|
|
if (N->getOperand(i) != Elt) return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure it is a splat of the first vector operand.
|
|
|
|
return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
|
2006-03-20 06:33:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
|
|
|
|
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
|
|
|
|
unsigned PPC::getVSPLTImmediate(SDNode *N) {
|
|
|
|
assert(isSplatShuffleMask(N));
|
2006-03-20 06:51:10 +00:00
|
|
|
return cast<ConstantSDNode>(N->getOperand(0))->getValue();
|
2006-03-20 06:33:01 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 07:48:08 +00:00
|
|
|
/// isZeroVector - Return true if this build_vector is an all-zero vector.
|
|
|
|
///
|
|
|
|
bool PPC::isZeroVector(SDNode *N) {
|
|
|
|
if (MVT::isInteger(N->getOperand(0).getValueType())) {
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
|
|
|
if (!isa<ConstantSDNode>(N->getOperand(i)) ||
|
|
|
|
cast<ConstantSDNode>(N->getOperand(i))->getValue() != 0)
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
assert(MVT::isFloatingPoint(N->getOperand(0).getValueType()) &&
|
|
|
|
"Vector of non-int, non-float values?");
|
|
|
|
// See if this is all zeros.
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
|
|
|
if (!isa<ConstantFPSDNode>(N->getOperand(i)) ||
|
|
|
|
!cast<ConstantFPSDNode>(N->getOperand(i))->isExactlyValue(0.0))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-03-20 06:33:01 +00:00
|
|
|
|
2005-08-26 00:52:45 +00:00
|
|
|
/// LowerOperation - Provide custom lowering hooks for some operations.
|
|
|
|
///
|
2005-10-16 05:39:50 +00:00
|
|
|
SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
2005-08-26 00:52:45 +00:00
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
default: assert(0 && "Wasn't expecting to be able to lower this!");
|
2005-08-31 21:09:52 +00:00
|
|
|
case ISD::FP_TO_SINT: {
|
2005-09-06 22:03:27 +00:00
|
|
|
assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
|
2005-10-02 06:37:13 +00:00
|
|
|
SDOperand Src = Op.getOperand(0);
|
|
|
|
if (Src.getValueType() == MVT::f32)
|
|
|
|
Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
|
|
|
|
|
2005-12-23 00:59:59 +00:00
|
|
|
SDOperand Tmp;
|
2005-09-06 22:03:27 +00:00
|
|
|
switch (Op.getValueType()) {
|
|
|
|
default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
|
|
|
|
case MVT::i32:
|
2005-12-23 00:59:59 +00:00
|
|
|
Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
|
2005-09-06 22:03:27 +00:00
|
|
|
break;
|
|
|
|
case MVT::i64:
|
2005-12-23 00:59:59 +00:00
|
|
|
Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
|
2005-09-06 22:03:27 +00:00
|
|
|
break;
|
|
|
|
}
|
2005-08-31 21:09:52 +00:00
|
|
|
|
2005-12-23 00:59:59 +00:00
|
|
|
// Convert the FP value to an int value through memory.
|
|
|
|
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
|
|
|
|
if (Op.getValueType() == MVT::i32)
|
|
|
|
Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
|
|
|
|
return Bits;
|
2005-09-06 22:03:27 +00:00
|
|
|
}
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-22 05:30:33 +00:00
|
|
|
case ISD::SINT_TO_FP:
|
|
|
|
if (Op.getOperand(0).getValueType() == MVT::i64) {
|
|
|
|
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
|
|
|
|
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
|
|
|
|
if (Op.getValueType() == MVT::f32)
|
|
|
|
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
|
|
|
return FP;
|
|
|
|
} else {
|
|
|
|
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
|
|
|
|
"Unhandled SINT_TO_FP type in custom expander!");
|
|
|
|
// Since we only generate this in 64-bit mode, we can take advantage of
|
|
|
|
// 64-bit registers. In particular, sign extend the input value into the
|
|
|
|
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
|
|
|
|
// then lfd it and fcfid it.
|
|
|
|
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
|
|
|
int FrameIdx = FrameInfo->CreateStackObject(8, 8);
|
|
|
|
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
|
|
|
|
|
|
|
|
SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
|
|
|
|
Op.getOperand(0));
|
|
|
|
|
|
|
|
// STD the extended value into the stack slot.
|
|
|
|
SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
|
|
|
|
DAG.getEntryNode(), Ext64, FIdx,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
// Load the value as a double.
|
|
|
|
SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
|
|
|
|
|
|
|
|
// FCFID it and return it.
|
|
|
|
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
|
|
|
|
if (Op.getValueType() == MVT::f32)
|
|
|
|
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
|
|
|
return FP;
|
|
|
|
}
|
2006-03-24 07:53:47 +00:00
|
|
|
break;
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-22 05:30:33 +00:00
|
|
|
|
2005-08-31 21:09:52 +00:00
|
|
|
case ISD::SELECT_CC: {
|
2005-08-26 00:52:45 +00:00
|
|
|
// Turn FP only select_cc's into fsel instructions.
|
2005-08-31 21:09:52 +00:00
|
|
|
if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
|
|
|
|
!MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
|
|
|
|
break;
|
|
|
|
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
|
|
|
|
|
|
|
// Cannot handle SETEQ/SETNE.
|
|
|
|
if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
|
|
|
|
|
|
|
|
MVT::ValueType ResVT = Op.getValueType();
|
|
|
|
MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
|
|
|
|
SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
|
|
|
SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
|
2005-08-26 00:52:45 +00:00
|
|
|
|
2005-08-31 21:09:52 +00:00
|
|
|
// If the RHS of the comparison is a 0.0, we don't need to do the
|
|
|
|
// subtraction at all.
|
|
|
|
if (isFloatingPointZero(RHS))
|
2005-08-26 00:52:45 +00:00
|
|
|
switch (CC) {
|
2006-01-18 19:42:35 +00:00
|
|
|
default: break; // SETUO etc aren't handled by fsel.
|
2005-08-26 00:52:45 +00:00
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETLT:
|
2005-08-31 21:09:52 +00:00
|
|
|
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
|
2005-08-26 00:52:45 +00:00
|
|
|
case ISD::SETUGE:
|
|
|
|
case ISD::SETGE:
|
2005-10-25 20:54:57 +00:00
|
|
|
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
|
2005-08-31 21:09:52 +00:00
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
|
2005-08-26 00:52:45 +00:00
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETGT:
|
2005-08-31 21:09:52 +00:00
|
|
|
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
|
2005-08-26 00:52:45 +00:00
|
|
|
case ISD::SETULE:
|
|
|
|
case ISD::SETLE:
|
2005-10-25 20:54:57 +00:00
|
|
|
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
|
2005-08-26 20:25:03 +00:00
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT,
|
2005-10-26 18:01:11 +00:00
|
|
|
DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
|
2005-08-26 00:52:45 +00:00
|
|
|
}
|
2005-08-31 21:09:52 +00:00
|
|
|
|
2005-10-25 20:54:57 +00:00
|
|
|
SDOperand Cmp;
|
2005-08-31 21:09:52 +00:00
|
|
|
switch (CC) {
|
2006-01-18 19:42:35 +00:00
|
|
|
default: break; // SETUO etc aren't handled by fsel.
|
2005-08-31 21:09:52 +00:00
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETLT:
|
2005-10-25 20:54:57 +00:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
|
2005-08-31 21:09:52 +00:00
|
|
|
case ISD::SETUGE:
|
|
|
|
case ISD::SETGE:
|
2005-10-25 20:54:57 +00:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
|
2005-08-31 21:09:52 +00:00
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETGT:
|
2005-10-25 20:54:57 +00:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
|
2005-08-31 21:09:52 +00:00
|
|
|
case ISD::SETULE:
|
|
|
|
case ISD::SETLE:
|
2005-10-25 20:54:57 +00:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
|
2005-08-26 00:52:45 +00:00
|
|
|
}
|
2005-08-31 21:09:52 +00:00
|
|
|
break;
|
|
|
|
}
|
2005-08-31 20:23:54 +00:00
|
|
|
case ISD::SHL: {
|
|
|
|
assert(Op.getValueType() == MVT::i64 &&
|
|
|
|
Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
|
|
|
|
// The generic code does a fine job expanding shift by a constant.
|
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) break;
|
|
|
|
|
|
|
|
// Otherwise, expand into a bunch of logical ops. Note that these ops
|
|
|
|
// depend on the PPC behavior for oversized shift amounts.
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(0, MVT::i32));
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Amt = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
|
|
|
|
DAG.getConstant(32, MVT::i32), Amt);
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
|
|
|
|
SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
|
2005-08-31 20:23:54 +00:00
|
|
|
SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
|
|
|
|
SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
|
|
|
|
DAG.getConstant(-32U, MVT::i32));
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
|
2005-08-31 20:23:54 +00:00
|
|
|
SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
|
2005-08-31 20:23:54 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
|
|
|
|
}
|
|
|
|
case ISD::SRL: {
|
|
|
|
assert(Op.getValueType() == MVT::i64 &&
|
|
|
|
Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
|
|
|
|
// The generic code does a fine job expanding shift by a constant.
|
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) break;
|
|
|
|
|
|
|
|
// Otherwise, expand into a bunch of logical ops. Note that these ops
|
|
|
|
// depend on the PPC behavior for oversized shift amounts.
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(0, MVT::i32));
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Amt = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
|
|
|
|
DAG.getConstant(32, MVT::i32), Amt);
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
|
|
|
|
SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
|
2005-08-31 20:23:54 +00:00
|
|
|
SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
|
|
|
|
SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
|
|
|
|
DAG.getConstant(-32U, MVT::i32));
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
|
2005-08-31 20:23:54 +00:00
|
|
|
SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
|
2005-08-31 20:23:54 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
|
|
|
|
}
|
|
|
|
case ISD::SRA: {
|
2005-08-31 19:09:57 +00:00
|
|
|
assert(Op.getValueType() == MVT::i64 &&
|
|
|
|
Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
|
|
|
|
// The generic code does a fine job expanding shift by a constant.
|
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) break;
|
|
|
|
|
|
|
|
// Otherwise, expand into a bunch of logical ops, followed by a select_cc.
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(0, MVT::i32));
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Amt = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
|
|
|
|
DAG.getConstant(32, MVT::i32), Amt);
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
|
|
|
|
SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
|
2005-08-31 19:09:57 +00:00
|
|
|
SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
|
|
|
|
SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
|
|
|
|
DAG.getConstant(-32U, MVT::i32));
|
2005-12-06 02:10:38 +00:00
|
|
|
SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
|
|
|
|
SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
|
2005-08-31 19:09:57 +00:00
|
|
|
SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
|
|
|
|
Tmp4, Tmp6, ISD::SETLE);
|
|
|
|
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
|
2005-08-26 00:52:45 +00:00
|
|
|
}
|
2005-12-10 02:36:00 +00:00
|
|
|
case ISD::ConstantPool: {
|
2006-01-31 22:23:14 +00:00
|
|
|
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
|
|
|
|
Constant *C = CP->get();
|
|
|
|
SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
|
2005-12-10 02:36:00 +00:00
|
|
|
SDOperand Zero = DAG.getConstant(0, MVT::i32);
|
|
|
|
|
2006-02-22 20:19:42 +00:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::Static) {
|
2005-12-10 02:36:00 +00:00
|
|
|
// Generate non-pic code that has direct accesses to the constant pool.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
|
|
|
|
return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only lower ConstantPool on Darwin.
|
|
|
|
if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
|
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
|
2006-02-22 20:19:42 +00:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
|
2005-12-10 02:36:00 +00:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
|
|
|
Hi = DAG.getNode(ISD::ADD, MVT::i32,
|
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
|
|
|
|
Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
return Lo;
|
|
|
|
}
|
2005-11-17 07:30:41 +00:00
|
|
|
case ISD::GlobalAddress: {
|
2005-12-24 01:00:15 +00:00
|
|
|
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
|
|
|
|
GlobalValue *GV = GSDN->getGlobal();
|
|
|
|
SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
|
2005-11-17 07:30:41 +00:00
|
|
|
SDOperand Zero = DAG.getConstant(0, MVT::i32);
|
2005-11-17 18:55:48 +00:00
|
|
|
|
2006-02-22 20:19:42 +00:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::Static) {
|
2005-12-10 02:36:00 +00:00
|
|
|
// Generate non-pic code that has direct accesses to globals.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
2005-11-17 18:55:48 +00:00
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
|
|
|
|
return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
}
|
2005-11-17 07:30:41 +00:00
|
|
|
|
2005-11-17 18:55:48 +00:00
|
|
|
// Only lower GlobalAddress on Darwin.
|
|
|
|
if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
|
2006-01-06 01:04:03 +00:00
|
|
|
|
2005-11-17 07:30:41 +00:00
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
|
2006-02-22 20:19:42 +00:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
|
2005-11-17 07:30:41 +00:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
|
|
|
Hi = DAG.getNode(ISD::ADD, MVT::i32,
|
2005-11-17 17:51:38 +00:00
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
|
2005-11-17 07:30:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
|
|
|
|
Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
|
2006-01-29 20:49:17 +00:00
|
|
|
if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
|
|
|
|
(!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
|
2005-11-17 07:30:41 +00:00
|
|
|
return Lo;
|
|
|
|
|
|
|
|
// If the global is weak or external, we have to go through the lazy
|
|
|
|
// resolution stub.
|
|
|
|
return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
|
|
|
|
}
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25848 91177308-0d34-0410-b5e6-96231b3b80d8
2006-01-31 08:17:29 +00:00
|
|
|
case ISD::SETCC: {
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
2006-02-01 07:19:44 +00:00
|
|
|
|
|
|
|
// If we're comparing for equality to zero, expose the fact that this is
|
|
|
|
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
|
|
|
|
// fold the new nodes.
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
|
|
|
if (C->isNullValue() && CC == ISD::SETEQ) {
|
|
|
|
MVT::ValueType VT = Op.getOperand(0).getValueType();
|
|
|
|
SDOperand Zext = Op.getOperand(0);
|
|
|
|
if (VT < MVT::i32) {
|
|
|
|
VT = MVT::i32;
|
|
|
|
Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
|
|
|
|
}
|
|
|
|
unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
|
|
|
|
SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
|
|
|
|
SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
|
|
|
|
DAG.getConstant(Log2b, getShiftAmountTy()));
|
|
|
|
return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc);
|
|
|
|
}
|
|
|
|
// Leave comparisons against 0 and -1 alone for now, since they're usually
|
|
|
|
// optimized. FIXME: revisit this when we can custom lower all setcc
|
|
|
|
// optimizations.
|
|
|
|
if (C->isAllOnesValue() || C->isNullValue())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have an integer seteq/setne, turn it into a compare against zero
|
|
|
|
// by subtracting the rhs from the lhs, which is faster than setting a
|
|
|
|
// condition register, reading it back out, and masking the correct bit.
|
|
|
|
MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
|
|
|
|
if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
|
|
|
MVT::ValueType VT = Op.getValueType();
|
|
|
|
SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
|
|
|
|
Op.getOperand(1));
|
|
|
|
return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
|
|
|
|
}
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25848 91177308-0d34-0410-b5e6-96231b3b80d8
2006-01-31 08:17:29 +00:00
|
|
|
break;
|
|
|
|
}
|
2006-01-25 18:21:52 +00:00
|
|
|
case ISD::VASTART: {
|
|
|
|
// vastart just stores the address of the VarArgsFrameIndex slot into the
|
|
|
|
// memory location argument.
|
|
|
|
// FIXME: Replace MVT::i32 with PointerTy
|
|
|
|
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
|
|
|
|
return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
|
|
|
|
Op.getOperand(1), Op.getOperand(2));
|
|
|
|
}
|
2006-01-27 21:09:22 +00:00
|
|
|
case ISD::RET: {
|
|
|
|
SDOperand Copy;
|
|
|
|
|
|
|
|
switch(Op.getNumOperands()) {
|
|
|
|
default:
|
|
|
|
assert(0 && "Do not know how to return this many arguments!");
|
|
|
|
abort();
|
|
|
|
case 1:
|
|
|
|
return SDOperand(); // ret void is legal
|
|
|
|
case 2: {
|
|
|
|
MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
|
|
|
|
unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1;
|
|
|
|
Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
|
|
|
|
SDOperand());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 3:
|
|
|
|
Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
|
|
|
|
SDOperand());
|
|
|
|
Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
|
|
|
|
}
|
2006-03-19 06:55:52 +00:00
|
|
|
case ISD::SCALAR_TO_VECTOR: {
|
|
|
|
// Create a stack slot that is 16-byte aligned.
|
|
|
|
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
|
|
|
int FrameIdx = FrameInfo->CreateStackObject(16, 16);
|
|
|
|
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
|
|
|
|
|
|
|
|
// Store the input value into Value#0 of the stack slot.
|
|
|
|
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
|
|
|
|
Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
|
2006-03-20 22:37:23 +00:00
|
|
|
// LVE_X it out.
|
2006-03-19 06:55:52 +00:00
|
|
|
return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
}
|
2006-03-24 07:48:08 +00:00
|
|
|
case ISD::BUILD_VECTOR:
|
|
|
|
// If this is a case we can't handle, return null and let the default
|
|
|
|
// expansion code take care of it. If we CAN select this case, return Op.
|
|
|
|
|
|
|
|
// See if this is all zeros.
|
|
|
|
// FIXME: We should handle splat(-0.0), and other cases here.
|
|
|
|
if (PPC::isZeroVector(Op.Val))
|
|
|
|
return Op;
|
|
|
|
return SDOperand();
|
|
|
|
|
2006-03-20 01:53:53 +00:00
|
|
|
case ISD::VECTOR_SHUFFLE: {
|
2006-03-20 06:51:10 +00:00
|
|
|
SDOperand V1 = Op.getOperand(0);
|
|
|
|
SDOperand V2 = Op.getOperand(1);
|
|
|
|
SDOperand PermMask = Op.getOperand(2);
|
|
|
|
|
|
|
|
// Cases that are handled by instructions that take permute immediates
|
|
|
|
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
|
|
|
|
// selected by the instruction selector.
|
|
|
|
if (PPC::isSplatShuffleMask(PermMask.Val) && V2.getOpcode() == ISD::UNDEF)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// TODO: Handle more cases, and also handle cases that are cheaper to do as
|
|
|
|
// multiple such instructions than as a constant pool load/vperm pair.
|
2006-03-20 01:53:53 +00:00
|
|
|
|
|
|
|
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
|
|
|
|
// vector that will get spilled to the constant pool.
|
|
|
|
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
|
|
|
|
|
|
|
|
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
|
|
|
|
// that it is in input element units, not in bytes. Convert now.
|
|
|
|
MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
|
|
|
|
unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
|
|
|
|
|
|
|
|
std::vector<SDOperand> ResultMask;
|
|
|
|
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
|
|
|
|
unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
|
|
|
|
|
|
|
|
for (unsigned j = 0; j != BytesPerElement; ++j)
|
|
|
|
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
|
|
|
|
MVT::i8));
|
|
|
|
}
|
|
|
|
|
|
|
|
SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
|
|
|
|
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
|
|
|
|
}
|
2005-08-31 20:23:54 +00:00
|
|
|
}
|
2005-08-26 00:52:45 +00:00
|
|
|
return SDOperand();
|
|
|
|
}
|
|
|
|
|
2005-08-16 17:14:42 +00:00
|
|
|
std::vector<SDOperand>
|
2005-10-16 05:39:50 +00:00
|
|
|
PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
|
2005-08-16 17:14:42 +00:00
|
|
|
//
|
|
|
|
// add beautiful description of PPC stack frame format, or at least some docs
|
|
|
|
//
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
|
|
|
MachineBasicBlock& BB = MF.front();
|
2005-09-13 19:33:40 +00:00
|
|
|
SSARegMap *RegMap = MF.getSSARegMap();
|
2005-08-16 17:14:42 +00:00
|
|
|
std::vector<SDOperand> ArgValues;
|
|
|
|
|
|
|
|
unsigned ArgOffset = 24;
|
|
|
|
unsigned GPR_remaining = 8;
|
|
|
|
unsigned FPR_remaining = 13;
|
|
|
|
unsigned GPR_idx = 0, FPR_idx = 0;
|
|
|
|
static const unsigned GPR[] = {
|
|
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
|
|
};
|
|
|
|
static const unsigned FPR[] = {
|
|
|
|
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
|
|
|
|
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
|
|
|
|
};
|
|
|
|
|
|
|
|
// Add DAG nodes to load the arguments... On entry to a function on PPC,
|
|
|
|
// the arguments start at offset 24, although they are likely to be passed
|
|
|
|
// in registers.
|
|
|
|
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
|
|
|
|
SDOperand newroot, argt;
|
|
|
|
unsigned ObjSize;
|
|
|
|
bool needsLoad = false;
|
|
|
|
bool ArgLive = !I->use_empty();
|
|
|
|
MVT::ValueType ObjectVT = getValueType(I->getType());
|
|
|
|
|
|
|
|
switch (ObjectVT) {
|
2005-08-30 00:19:00 +00:00
|
|
|
default: assert(0 && "Unhandled argument type!");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
ObjSize = 4;
|
|
|
|
if (!ArgLive) break;
|
|
|
|
if (GPR_remaining > 0) {
|
2005-10-18 00:28:58 +00:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-13 19:33:40 +00:00
|
|
|
MF.addLiveIn(GPR[GPR_idx], VReg);
|
|
|
|
argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
|
2005-08-31 01:58:39 +00:00
|
|
|
if (ObjectVT != MVT::i32) {
|
|
|
|
unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
|
|
|
|
: ISD::AssertZext;
|
|
|
|
argt = DAG.getNode(AssertOp, MVT::i32, argt,
|
|
|
|
DAG.getValueType(ObjectVT));
|
|
|
|
argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
|
|
|
|
}
|
2005-08-30 00:19:00 +00:00
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
2005-11-30 20:40:54 +00:00
|
|
|
case MVT::i64:
|
|
|
|
ObjSize = 8;
|
2005-08-30 00:19:00 +00:00
|
|
|
if (!ArgLive) break;
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
SDOperand argHi, argLo;
|
2005-10-18 00:28:58 +00:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-13 19:33:40 +00:00
|
|
|
MF.addLiveIn(GPR[GPR_idx], VReg);
|
|
|
|
argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
|
2005-08-30 00:19:00 +00:00
|
|
|
// If we have two or more remaining argument registers, then both halves
|
|
|
|
// of the i64 can be sourced from there. Otherwise, the lower half will
|
|
|
|
// have to come off the stack. This can happen when an i64 is preceded
|
|
|
|
// by 28 bytes of arguments.
|
|
|
|
if (GPR_remaining > 1) {
|
2005-10-18 00:28:58 +00:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-13 19:33:40 +00:00
|
|
|
MF.addLiveIn(GPR[GPR_idx+1], VReg);
|
|
|
|
argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
|
2005-08-30 00:19:00 +00:00
|
|
|
} else {
|
|
|
|
int FI = MFI->CreateFixedObject(4, ArgOffset+4);
|
|
|
|
SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
|
|
argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
}
|
|
|
|
// Build the outgoing arg thingy
|
|
|
|
argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
|
|
|
|
newroot = argLo;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
|
2006-01-11 18:21:25 +00:00
|
|
|
if (!ArgLive) {
|
|
|
|
if (FPR_remaining > 0) {
|
|
|
|
--FPR_remaining;
|
|
|
|
++FPR_idx;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2005-08-30 00:19:00 +00:00
|
|
|
if (FPR_remaining > 0) {
|
2005-10-01 01:35:02 +00:00
|
|
|
unsigned VReg;
|
|
|
|
if (ObjectVT == MVT::f32)
|
2005-10-18 00:28:58 +00:00
|
|
|
VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
|
2005-10-01 01:35:02 +00:00
|
|
|
else
|
2005-10-18 00:28:58 +00:00
|
|
|
VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
|
2005-09-13 19:33:40 +00:00
|
|
|
MF.addLiveIn(FPR[FPR_idx], VReg);
|
|
|
|
argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
|
2005-08-30 00:19:00 +00:00
|
|
|
--FPR_remaining;
|
|
|
|
++FPR_idx;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// We need to load the argument to a virtual register if we determined above
|
|
|
|
// that we ran out of physical registers of the appropriate type
|
|
|
|
if (needsLoad) {
|
|
|
|
unsigned SubregOffset = 0;
|
|
|
|
if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
|
|
|
|
if (ObjectVT == MVT::i16) SubregOffset = 2;
|
|
|
|
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
|
|
|
|
SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
|
|
FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
|
|
|
|
DAG.getConstant(SubregOffset, MVT::i32));
|
|
|
|
argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Every 4 bytes of argument space consumes one of the GPRs available for
|
|
|
|
// argument passing.
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
|
|
|
|
GPR_remaining -= delta;
|
|
|
|
GPR_idx += delta;
|
|
|
|
}
|
|
|
|
ArgOffset += ObjSize;
|
|
|
|
if (newroot.Val)
|
|
|
|
DAG.setRoot(newroot.getValue(1));
|
|
|
|
|
|
|
|
ArgValues.push_back(argt);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the function takes variable number of arguments, make a frame index for
|
|
|
|
// the start of the first vararg value... for expansion of llvm.va_start.
|
|
|
|
if (F.isVarArg()) {
|
|
|
|
VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
|
|
|
|
SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
|
|
|
|
// If this function is vararg, store any remaining integer argument regs
|
|
|
|
// to their spots on the stack so that they may be loaded by deferencing the
|
|
|
|
// result of va_next.
|
|
|
|
std::vector<SDOperand> MemOps;
|
|
|
|
for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
|
2005-10-18 00:28:58 +00:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-13 19:33:40 +00:00
|
|
|
MF.addLiveIn(GPR[GPR_idx], VReg);
|
|
|
|
SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
|
2005-08-16 17:14:42 +00:00
|
|
|
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
|
|
|
|
Val, FIN, DAG.getSrcValue(NULL));
|
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by four for the next argument to store
|
|
|
|
SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
|
|
|
|
FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
|
|
|
|
}
|
2005-11-30 20:40:54 +00:00
|
|
|
if (!MemOps.empty()) {
|
|
|
|
MemOps.push_back(DAG.getRoot());
|
|
|
|
DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
|
|
|
|
}
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finally, inform the code generator which regs we return values in.
|
|
|
|
switch (getValueType(F.getReturnType())) {
|
|
|
|
default: assert(0 && "Unknown type!");
|
|
|
|
case MVT::isVoid: break;
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
MF.addLiveOut(PPC::R3);
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
MF.addLiveOut(PPC::R3);
|
|
|
|
MF.addLiveOut(PPC::R4);
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
MF.addLiveOut(PPC::F1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ArgValues;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::pair<SDOperand, SDOperand>
|
2005-10-16 05:39:50 +00:00
|
|
|
PPCTargetLowering::LowerCallTo(SDOperand Chain,
|
|
|
|
const Type *RetTy, bool isVarArg,
|
|
|
|
unsigned CallingConv, bool isTailCall,
|
|
|
|
SDOperand Callee, ArgListTy &Args,
|
|
|
|
SelectionDAG &DAG) {
|
2006-01-27 23:34:02 +00:00
|
|
|
// args_to_use will accumulate outgoing args for the PPCISD::CALL case in
|
2005-08-16 17:14:42 +00:00
|
|
|
// SelectExpr to use to put the arguments in the appropriate registers.
|
|
|
|
std::vector<SDOperand> args_to_use;
|
|
|
|
|
|
|
|
// Count how many bytes are to be pushed on the stack, including the linkage
|
|
|
|
// area, and parameter passing area.
|
|
|
|
unsigned NumBytes = 24;
|
|
|
|
|
|
|
|
if (Args.empty()) {
|
2006-02-13 08:55:29 +00:00
|
|
|
Chain = DAG.getCALLSEQ_START(Chain,
|
|
|
|
DAG.getConstant(NumBytes, getPointerTy()));
|
2005-08-16 17:14:42 +00:00
|
|
|
} else {
|
2005-08-30 00:19:00 +00:00
|
|
|
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
|
2005-08-16 17:14:42 +00:00
|
|
|
switch (getValueType(Args[i].second)) {
|
2005-08-30 00:19:00 +00:00
|
|
|
default: assert(0 && "Unknown value type!");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
case MVT::f32:
|
|
|
|
NumBytes += 4;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
case MVT::f64:
|
|
|
|
NumBytes += 8;
|
|
|
|
break;
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
2005-08-30 00:19:00 +00:00
|
|
|
}
|
2005-08-16 17:14:42 +00:00
|
|
|
|
2005-08-30 00:19:00 +00:00
|
|
|
// Just to be safe, we'll always reserve the full 24 bytes of linkage area
|
|
|
|
// plus 32 bytes of argument space in case any called code gets funky on us.
|
|
|
|
// (Required by ABI to support var arg)
|
|
|
|
if (NumBytes < 56) NumBytes = 56;
|
2005-08-16 17:14:42 +00:00
|
|
|
|
|
|
|
// Adjust the stack pointer for the new arguments...
|
|
|
|
// These operations are automatically eliminated by the prolog/epilog pass
|
2006-02-13 08:55:29 +00:00
|
|
|
Chain = DAG.getCALLSEQ_START(Chain,
|
|
|
|
DAG.getConstant(NumBytes, getPointerTy()));
|
2005-08-16 17:14:42 +00:00
|
|
|
|
|
|
|
// Set up a copy of the stack pointer for use loading and storing any
|
|
|
|
// arguments that may not fit in the registers available for argument
|
|
|
|
// passing.
|
Fix calls that need to store values in stack slots, to not copy the stack
pointer. This allows us to emit stuff like this:
li r10, 0
stw r10, 56(r1)
or r3, r10, r10
or r4, r10, r10
or r5, r10, r10
or r6, r10, r10
or r7, r10, r10
or r8, r10, r10
or r9, r10, r10
bl L_bar$stub
instead of this:
or r2, r1, r1 ;; Extraneous copy.
li r10, 0
stw r10, 56(r2)
or r3, r10, r10
or r4, r10, r10
or r5, r10, r10
or r6, r10, r10
or r7, r10, r10
or r8, r10, r10
or r9, r10, r10
bl L_bar$stub
wowness.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25221 91177308-0d34-0410-b5e6-96231b3b80d8
2006-01-11 19:55:07 +00:00
|
|
|
SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
|
2005-08-16 17:14:42 +00:00
|
|
|
|
|
|
|
// Figure out which arguments are going to go in registers, and which in
|
|
|
|
// memory. Also, if this is a vararg function, floating point operations
|
|
|
|
// must be stored to our stack, and loaded into integer regs as well, if
|
|
|
|
// any integer regs are available for argument passing.
|
|
|
|
unsigned ArgOffset = 24;
|
|
|
|
unsigned GPR_remaining = 8;
|
|
|
|
unsigned FPR_remaining = 13;
|
|
|
|
|
|
|
|
std::vector<SDOperand> MemOps;
|
|
|
|
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
|
|
|
|
// PtrOff will be used to store the current argument to the stack if a
|
|
|
|
// register cannot be found for it.
|
|
|
|
SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
|
|
|
|
MVT::ValueType ArgVT = getValueType(Args[i].second);
|
|
|
|
|
|
|
|
switch (ArgVT) {
|
2005-08-30 00:19:00 +00:00
|
|
|
default: assert(0 && "Unexpected ValueType for argument!");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
// Promote the integer to 32 bits. If the input type is signed use a
|
|
|
|
// sign extend, otherwise use a zero extend.
|
|
|
|
if (Args[i].second->isSigned())
|
|
|
|
Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
|
|
|
|
else
|
|
|
|
Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
|
|
|
|
// FALL THROUGH
|
|
|
|
case MVT::i32:
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
args_to_use.push_back(Args[i].first);
|
|
|
|
--GPR_remaining;
|
|
|
|
} else {
|
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL)));
|
|
|
|
}
|
|
|
|
ArgOffset += 4;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
// If we have one free GPR left, we can place the upper half of the i64
|
|
|
|
// in it, and store the other half to the stack. If we have two or more
|
|
|
|
// free GPRs, then we can pass both halves of the i64 in registers.
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
|
|
|
|
Args[i].first, DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
|
|
|
|
Args[i].first, DAG.getConstant(0, MVT::i32));
|
|
|
|
args_to_use.push_back(Hi);
|
|
|
|
--GPR_remaining;
|
2005-08-16 17:14:42 +00:00
|
|
|
if (GPR_remaining > 0) {
|
2005-08-30 00:19:00 +00:00
|
|
|
args_to_use.push_back(Lo);
|
2005-08-16 17:14:42 +00:00
|
|
|
--GPR_remaining;
|
|
|
|
} else {
|
2005-08-30 00:19:00 +00:00
|
|
|
SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
|
2005-08-16 17:14:42 +00:00
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
2005-08-30 00:19:00 +00:00
|
|
|
Lo, PtrOff, DAG.getSrcValue(NULL)));
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
2005-08-30 00:19:00 +00:00
|
|
|
} else {
|
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL)));
|
|
|
|
}
|
|
|
|
ArgOffset += 8;
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
if (FPR_remaining > 0) {
|
|
|
|
args_to_use.push_back(Args[i].first);
|
|
|
|
--FPR_remaining;
|
|
|
|
if (isVarArg) {
|
|
|
|
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Float varargs are always shadowed in available integer registers
|
2005-08-16 17:14:42 +00:00
|
|
|
if (GPR_remaining > 0) {
|
2005-08-30 00:19:00 +00:00
|
|
|
SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL));
|
2005-11-17 18:30:17 +00:00
|
|
|
MemOps.push_back(Load.getValue(1));
|
2005-08-30 00:19:00 +00:00
|
|
|
args_to_use.push_back(Load);
|
2005-08-16 17:14:42 +00:00
|
|
|
--GPR_remaining;
|
2005-08-30 00:19:00 +00:00
|
|
|
}
|
|
|
|
if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
|
2005-08-16 17:14:42 +00:00
|
|
|
SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
|
2005-08-30 00:19:00 +00:00
|
|
|
SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL));
|
2005-11-17 18:30:17 +00:00
|
|
|
MemOps.push_back(Load.getValue(1));
|
2005-08-30 00:19:00 +00:00
|
|
|
args_to_use.push_back(Load);
|
|
|
|
--GPR_remaining;
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
|
|
|
} else {
|
2005-08-30 00:19:00 +00:00
|
|
|
// If we have any FPRs remaining, we may also have GPRs remaining.
|
|
|
|
// Args passed in FPRs consume either 1 (f32) or 2 (f64) available
|
|
|
|
// GPRs.
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
|
|
|
|
--GPR_remaining;
|
|
|
|
}
|
|
|
|
if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
|
|
|
|
args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
|
|
|
|
--GPR_remaining;
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
|
|
|
}
|
2005-08-30 00:19:00 +00:00
|
|
|
} else {
|
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL)));
|
|
|
|
}
|
|
|
|
ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
|
|
|
|
break;
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!MemOps.empty())
|
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<MVT::ValueType> RetVals;
|
|
|
|
MVT::ValueType RetTyVT = getValueType(RetTy);
|
2005-09-02 01:24:55 +00:00
|
|
|
MVT::ValueType ActualRetTyVT = RetTyVT;
|
|
|
|
if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
|
|
|
|
ActualRetTyVT = MVT::i32; // Promote result to i32.
|
|
|
|
|
2006-01-28 07:33:03 +00:00
|
|
|
if (RetTyVT == MVT::i64) {
|
|
|
|
RetVals.push_back(MVT::i32);
|
|
|
|
RetVals.push_back(MVT::i32);
|
|
|
|
} else if (RetTyVT != MVT::isVoid) {
|
2005-09-02 01:24:55 +00:00
|
|
|
RetVals.push_back(ActualRetTyVT);
|
2006-01-28 07:33:03 +00:00
|
|
|
}
|
2005-08-16 17:14:42 +00:00
|
|
|
RetVals.push_back(MVT::Other);
|
|
|
|
|
2005-11-17 05:56:14 +00:00
|
|
|
// If the callee is a GlobalAddress node (quite common, every direct call is)
|
|
|
|
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
|
|
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
|
|
|
|
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
|
|
|
|
|
2006-01-27 23:34:02 +00:00
|
|
|
std::vector<SDOperand> Ops;
|
|
|
|
Ops.push_back(Chain);
|
|
|
|
Ops.push_back(Callee);
|
|
|
|
Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
|
|
|
|
SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
|
2006-01-28 07:33:03 +00:00
|
|
|
Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
|
2005-08-16 17:14:42 +00:00
|
|
|
Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
|
|
|
|
DAG.getConstant(NumBytes, getPointerTy()));
|
2005-09-02 01:24:55 +00:00
|
|
|
SDOperand RetVal = TheCall;
|
|
|
|
|
|
|
|
// If the result is a small value, add a note so that we keep track of the
|
|
|
|
// information about whether it is sign or zero extended.
|
|
|
|
if (RetTyVT != ActualRetTyVT) {
|
|
|
|
RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
|
|
|
|
MVT::i32, RetVal, DAG.getValueType(RetTyVT));
|
|
|
|
RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
|
2006-01-28 07:33:03 +00:00
|
|
|
} else if (RetTyVT == MVT::i64) {
|
|
|
|
RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
|
2005-09-02 01:24:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(RetVal, Chain);
|
2005-08-16 17:14:42 +00:00
|
|
|
}
|
|
|
|
|
2005-08-26 21:23:58 +00:00
|
|
|
MachineBasicBlock *
|
2005-10-16 05:39:50 +00:00
|
|
|
PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
|
|
|
|
MachineBasicBlock *BB) {
|
2005-08-26 21:23:58 +00:00
|
|
|
assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
|
2005-10-01 01:35:02 +00:00
|
|
|
MI->getOpcode() == PPC::SELECT_CC_F4 ||
|
|
|
|
MI->getOpcode() == PPC::SELECT_CC_F8) &&
|
2005-08-26 21:23:58 +00:00
|
|
|
"Unexpected instr type to insert");
|
|
|
|
|
|
|
|
// To "insert" a SELECT_CC instruction, we actually have to insert the diamond
|
|
|
|
// control-flow pattern. The incoming instruction knows the destination vreg
|
|
|
|
// to set, the condition code register to branch on, the true/false values to
|
|
|
|
// select between, and a branch opcode to use.
|
|
|
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
|
|
|
ilist<MachineBasicBlock>::iterator It = BB;
|
|
|
|
++It;
|
|
|
|
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// TrueVal = ...
|
|
|
|
// cmpTY ccX, r1, r2
|
|
|
|
// bCC copy1MBB
|
|
|
|
// fallthrough --> copy0MBB
|
|
|
|
MachineBasicBlock *thisMBB = BB;
|
|
|
|
MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
|
|
|
|
BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
|
|
|
|
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
|
|
|
|
MachineFunction *F = BB->getParent();
|
|
|
|
F->getBasicBlockList().insert(It, copy0MBB);
|
|
|
|
F->getBasicBlockList().insert(It, sinkMBB);
|
|
|
|
// Update machine-CFG edges
|
|
|
|
BB->addSuccessor(copy0MBB);
|
|
|
|
BB->addSuccessor(sinkMBB);
|
|
|
|
|
|
|
|
// copy0MBB:
|
|
|
|
// %FalseValue = ...
|
|
|
|
// # fallthrough to sinkMBB
|
|
|
|
BB = copy0MBB;
|
|
|
|
|
|
|
|
// Update machine-CFG edges
|
|
|
|
BB->addSuccessor(sinkMBB);
|
|
|
|
|
|
|
|
// sinkMBB:
|
|
|
|
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
|
|
|
|
// ...
|
|
|
|
BB = sinkMBB;
|
|
|
|
BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
|
|
|
|
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
|
|
|
|
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
|
|
|
|
|
|
|
|
delete MI; // The pseudo instruction is gone now.
|
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
2006-03-01 04:57:39 +00:00
|
|
|
SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|
|
|
DAGCombinerInfo &DCI) const {
|
|
|
|
TargetMachine &TM = getTargetMachine();
|
|
|
|
SelectionDAG &DAG = DCI.DAG;
|
|
|
|
switch (N->getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case ISD::SINT_TO_FP:
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-22 05:30:33 +00:00
|
|
|
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
|
|
|
|
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
|
|
|
|
// We allow the src/dst to be either f32/f64, but the intermediate
|
|
|
|
// type must be i64.
|
|
|
|
if (N->getOperand(0).getValueType() == MVT::i64) {
|
|
|
|
SDOperand Val = N->getOperand(0).getOperand(0);
|
|
|
|
if (Val.getValueType() == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
}
|
|
|
|
|
|
|
|
Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
|
2006-03-01 04:57:39 +00:00
|
|
|
DCI.AddToWorklist(Val.Val);
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-22 05:30:33 +00:00
|
|
|
Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
|
2006-03-01 04:57:39 +00:00
|
|
|
DCI.AddToWorklist(Val.Val);
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26943 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-22 05:30:33 +00:00
|
|
|
if (N->getValueType(0) == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
}
|
|
|
|
return Val;
|
|
|
|
} else if (N->getOperand(0).getValueType() == MVT::i32) {
|
|
|
|
// If the intermediate type is i32, we can avoid the load/store here
|
|
|
|
// too.
|
2006-03-01 04:57:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2006-03-01 05:50:56 +00:00
|
|
|
case ISD::STORE:
|
|
|
|
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
|
|
|
|
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
|
|
|
|
N->getOperand(1).getValueType() == MVT::i32) {
|
|
|
|
SDOperand Val = N->getOperand(1).getOperand(0);
|
|
|
|
if (Val.getValueType() == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
}
|
|
|
|
Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
|
|
|
|
Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
|
|
|
|
N->getOperand(2), N->getOperand(3));
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
return Val;
|
|
|
|
}
|
|
|
|
break;
|
2006-03-01 04:57:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return SDOperand();
|
|
|
|
}
|
|
|
|
|
2006-02-07 20:16:30 +00:00
|
|
|
/// getConstraintType - Given a constraint letter, return the type of
|
|
|
|
/// constraint it is for this target.
|
|
|
|
PPCTargetLowering::ConstraintType
|
|
|
|
PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
|
|
|
|
switch (ConstraintLetter) {
|
|
|
|
default: break;
|
|
|
|
case 'b':
|
|
|
|
case 'r':
|
|
|
|
case 'f':
|
|
|
|
case 'v':
|
|
|
|
case 'y':
|
|
|
|
return C_RegisterClass;
|
|
|
|
}
|
|
|
|
return TargetLowering::getConstraintType(ConstraintLetter);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-01-31 19:20:21 +00:00
|
|
|
std::vector<unsigned> PPCTargetLowering::
|
2006-02-22 00:56:39 +00:00
|
|
|
getRegClassForInlineAsmConstraint(const std::string &Constraint,
|
|
|
|
MVT::ValueType VT) const {
|
2006-01-31 19:20:21 +00:00
|
|
|
if (Constraint.size() == 1) {
|
|
|
|
switch (Constraint[0]) { // GCC RS6000 Constraint Letters
|
|
|
|
default: break; // Unknown constriant letter
|
|
|
|
case 'b':
|
|
|
|
return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
|
|
|
|
PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
|
|
|
|
PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
|
|
|
|
PPC::R12, PPC::R13, PPC::R14, PPC::R15,
|
|
|
|
PPC::R16, PPC::R17, PPC::R18, PPC::R19,
|
|
|
|
PPC::R20, PPC::R21, PPC::R22, PPC::R23,
|
|
|
|
PPC::R24, PPC::R25, PPC::R26, PPC::R27,
|
|
|
|
PPC::R28, PPC::R29, PPC::R30, PPC::R31,
|
|
|
|
0);
|
|
|
|
case 'r':
|
|
|
|
return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
|
|
|
|
PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
|
|
|
|
PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
|
|
|
|
PPC::R12, PPC::R13, PPC::R14, PPC::R15,
|
|
|
|
PPC::R16, PPC::R17, PPC::R18, PPC::R19,
|
|
|
|
PPC::R20, PPC::R21, PPC::R22, PPC::R23,
|
|
|
|
PPC::R24, PPC::R25, PPC::R26, PPC::R27,
|
|
|
|
PPC::R28, PPC::R29, PPC::R30, PPC::R31,
|
|
|
|
0);
|
|
|
|
case 'f':
|
|
|
|
return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
|
|
|
|
PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
|
|
|
|
PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
|
|
|
|
PPC::F12, PPC::F13, PPC::F14, PPC::F15,
|
|
|
|
PPC::F16, PPC::F17, PPC::F18, PPC::F19,
|
|
|
|
PPC::F20, PPC::F21, PPC::F22, PPC::F23,
|
|
|
|
PPC::F24, PPC::F25, PPC::F26, PPC::F27,
|
|
|
|
PPC::F28, PPC::F29, PPC::F30, PPC::F31,
|
|
|
|
0);
|
|
|
|
case 'v':
|
|
|
|
return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
|
|
|
|
PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
|
|
|
|
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
|
|
|
|
PPC::V12, PPC::V13, PPC::V14, PPC::V15,
|
|
|
|
PPC::V16, PPC::V17, PPC::V18, PPC::V19,
|
|
|
|
PPC::V20, PPC::V21, PPC::V22, PPC::V23,
|
|
|
|
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
|
|
|
|
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
|
|
|
|
0);
|
|
|
|
case 'y':
|
|
|
|
return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
|
|
|
|
PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
|
|
|
|
0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-02-22 00:56:39 +00:00
|
|
|
return std::vector<unsigned>();
|
2006-01-31 19:20:21 +00:00
|
|
|
}
|
2006-02-07 00:47:13 +00:00
|
|
|
|
|
|
|
// isOperandValidForConstraint
|
|
|
|
bool PPCTargetLowering::
|
|
|
|
isOperandValidForConstraint(SDOperand Op, char Letter) {
|
|
|
|
switch (Letter) {
|
|
|
|
default: break;
|
|
|
|
case 'I':
|
|
|
|
case 'J':
|
|
|
|
case 'K':
|
|
|
|
case 'L':
|
|
|
|
case 'M':
|
|
|
|
case 'N':
|
|
|
|
case 'O':
|
|
|
|
case 'P': {
|
|
|
|
if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate.
|
|
|
|
unsigned Value = cast<ConstantSDNode>(Op)->getValue();
|
|
|
|
switch (Letter) {
|
|
|
|
default: assert(0 && "Unknown constraint letter!");
|
|
|
|
case 'I': // "I" is a signed 16-bit constant.
|
|
|
|
return (short)Value == (int)Value;
|
|
|
|
case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
|
|
|
|
case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
|
|
|
|
return (short)Value == 0;
|
|
|
|
case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
|
|
|
|
return (Value >> 16) == 0;
|
|
|
|
case 'M': // "M" is a constant that is greater than 31.
|
|
|
|
return Value > 31;
|
|
|
|
case 'N': // "N" is a positive constant that is an exact power of two.
|
|
|
|
return (int)Value > 0 && isPowerOf2_32(Value);
|
|
|
|
case 'O': // "O" is the constant zero.
|
|
|
|
return Value == 0;
|
|
|
|
case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
|
|
|
|
return (short)-Value == (int)-Value;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle standard constraint letters.
|
|
|
|
return TargetLowering::isOperandValidForConstraint(Op, Letter);
|
|
|
|
}
|
2006-03-13 23:20:37 +00:00
|
|
|
|
|
|
|
/// isLegalAddressImmediate - Return true if the integer value can be used
|
|
|
|
/// as the offset of the target addressing mode.
|
|
|
|
bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
|
|
|
|
// PPC allows a sign-extended 16-bit immediate field.
|
|
|
|
return (V > -(1 << 16) && V < (1 << 16)-1);
|
|
|
|
}
|