mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
ARM backend contribution from Apple.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33353 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bd92d81d22
commit
a8e2989ece
@ -20,43 +20,77 @@
|
||||
#include <cassert>
|
||||
|
||||
namespace llvm {
|
||||
// Enums corresponding to ARM condition codes
|
||||
namespace ARMCC {
|
||||
enum CondCodes {
|
||||
EQ,
|
||||
NE,
|
||||
CS,
|
||||
CC,
|
||||
MI,
|
||||
PL,
|
||||
VS,
|
||||
VC,
|
||||
HI,
|
||||
LS,
|
||||
GE,
|
||||
LT,
|
||||
GT,
|
||||
LE,
|
||||
AL
|
||||
};
|
||||
|
||||
class ARMTargetMachine;
|
||||
class FunctionPass;
|
||||
|
||||
// Enums corresponding to ARM condition codes
|
||||
namespace ARMCC {
|
||||
enum CondCodes {
|
||||
EQ,
|
||||
NE,
|
||||
HS,
|
||||
LO,
|
||||
MI,
|
||||
PL,
|
||||
VS,
|
||||
VC,
|
||||
HI,
|
||||
LS,
|
||||
GE,
|
||||
LT,
|
||||
GT,
|
||||
LE,
|
||||
AL
|
||||
};
|
||||
|
||||
inline static CondCodes getOppositeCondition(CondCodes CC){
|
||||
switch (CC) {
|
||||
default: assert(0 && "Unknown condition code");
|
||||
case EQ: return NE;
|
||||
case NE: return EQ;
|
||||
case HS: return LO;
|
||||
case LO: return HS;
|
||||
case MI: return PL;
|
||||
case PL: return MI;
|
||||
case VS: return VC;
|
||||
case VC: return VS;
|
||||
case HI: return LS;
|
||||
case LS: return HI;
|
||||
case GE: return LT;
|
||||
case LT: return GE;
|
||||
case GT: return LE;
|
||||
case LE: return GT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace ARMShift {
|
||||
enum ShiftTypes {
|
||||
LSL,
|
||||
LSR,
|
||||
ASR,
|
||||
ROR,
|
||||
RRX
|
||||
};
|
||||
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
|
||||
switch (CC) {
|
||||
default: assert(0 && "Unknown condition code");
|
||||
case ARMCC::EQ: return "eq";
|
||||
case ARMCC::NE: return "ne";
|
||||
case ARMCC::HS: return "hs";
|
||||
case ARMCC::LO: return "lo";
|
||||
case ARMCC::MI: return "mi";
|
||||
case ARMCC::PL: return "pl";
|
||||
case ARMCC::VS: return "vs";
|
||||
case ARMCC::VC: return "vc";
|
||||
case ARMCC::HI: return "hi";
|
||||
case ARMCC::LS: return "ls";
|
||||
case ARMCC::GE: return "ge";
|
||||
case ARMCC::LT: return "lt";
|
||||
case ARMCC::GT: return "gt";
|
||||
case ARMCC::LE: return "le";
|
||||
case ARMCC::AL: return "al";
|
||||
}
|
||||
}
|
||||
|
||||
class FunctionPass;
|
||||
class TargetMachine;
|
||||
FunctionPass *createARMISelDag(ARMTargetMachine &TM);
|
||||
FunctionPass *createARMCodePrinterPass(std::ostream &O, ARMTargetMachine &TM);
|
||||
FunctionPass *createARMLoadStoreOptimizationPass();
|
||||
FunctionPass *createARMConstantIslandPass();
|
||||
|
||||
FunctionPass *createARMISelDag(TargetMachine &TM);
|
||||
FunctionPass *createARMCodePrinterPass(std::ostream &OS, TargetMachine &TM);
|
||||
FunctionPass *createARMFixMulPass();
|
||||
} // end namespace llvm;
|
||||
|
||||
// Defines symbolic names for ARM registers. This defines a mapping from
|
||||
|
@ -17,6 +17,73 @@
|
||||
|
||||
include "../Target.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM Subtarget features.
|
||||
//
|
||||
|
||||
def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
|
||||
"ARM v4T">;
|
||||
def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
|
||||
"ARM v5T">;
|
||||
def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
|
||||
"ARM v5TE, v5TEj, v5TExp">;
|
||||
def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
|
||||
"ARM v6">;
|
||||
def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFP2", "true",
|
||||
"Enable VFP2 instructions ">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM Processors supported.
|
||||
//
|
||||
|
||||
class Proc<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, NoItineraries, Features>;
|
||||
|
||||
// V4 Processors.
|
||||
def : Proc<"generic", []>;
|
||||
def : Proc<"arm8", []>;
|
||||
def : Proc<"arm810", []>;
|
||||
def : Proc<"strongarm", []>;
|
||||
def : Proc<"strongarm110", []>;
|
||||
def : Proc<"strongarm1100", []>;
|
||||
def : Proc<"strongarm1110", []>;
|
||||
|
||||
// V4T Processors.
|
||||
def : Proc<"arm7tdmi", [ArchV4T]>;
|
||||
def : Proc<"arm7tdmi-s", [ArchV4T]>;
|
||||
def : Proc<"arm710t", [ArchV4T]>;
|
||||
def : Proc<"arm720t", [ArchV4T]>;
|
||||
def : Proc<"arm9", [ArchV4T]>;
|
||||
def : Proc<"arm9tdmi", [ArchV4T]>;
|
||||
def : Proc<"arm920", [ArchV4T]>;
|
||||
def : Proc<"arm920t", [ArchV4T]>;
|
||||
def : Proc<"arm922t", [ArchV4T]>;
|
||||
def : Proc<"arm940t", [ArchV4T]>;
|
||||
def : Proc<"ep9312", [ArchV4T]>;
|
||||
|
||||
// V5T Processors.
|
||||
def : Proc<"arm10tdmi", [ArchV5T]>;
|
||||
def : Proc<"arm1020t", [ArchV5T]>;
|
||||
|
||||
// V5TE Processors.
|
||||
def : Proc<"arm9e", [ArchV5TE]>;
|
||||
def : Proc<"arm946e-s", [ArchV5TE]>;
|
||||
def : Proc<"arm966e-s", [ArchV5TE]>;
|
||||
def : Proc<"arm968e-s", [ArchV5TE]>;
|
||||
def : Proc<"arm10e", [ArchV5TE]>;
|
||||
def : Proc<"arm1020e", [ArchV5TE]>;
|
||||
def : Proc<"arm1022e", [ArchV5TE]>;
|
||||
def : Proc<"xscale", [ArchV5TE]>;
|
||||
def : Proc<"iwmmxt", [ArchV5TE]>;
|
||||
|
||||
// V6 Processors.
|
||||
def : Proc<"arm1136j-s", [ArchV6]>;
|
||||
def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
|
||||
def : Proc<"arm1176jz-s", [ArchV6]>;
|
||||
def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
|
||||
def : Proc<"mpcorenovfp", [ArchV6]>;
|
||||
def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -31,8 +98,14 @@ include "ARMInstrInfo.td"
|
||||
|
||||
def ARMInstrInfo : InstrInfo {
|
||||
// Define how we want to layout our target-specific information field.
|
||||
let TSFlagsFields = [];
|
||||
let TSFlagsShifts = [];
|
||||
let TSFlagsFields = ["AddrModeBits",
|
||||
"SizeFlag",
|
||||
"IndexModeBits",
|
||||
"Opcode"];
|
||||
let TSFlagsShifts = [0,
|
||||
4,
|
||||
7,
|
||||
9];
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
394
lib/Target/ARM/ARMAddressingModes.h
Normal file
394
lib/Target/ARM/ARMAddressingModes.h
Normal file
@ -0,0 +1,394 @@
|
||||
//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Chris Lattner and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the ARM addressing mode implementation stuff.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
|
||||
#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
|
||||
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// ARM_AM - ARM Addressing Mode Stuff
|
||||
namespace ARM_AM {
|
||||
enum ShiftOpc {
|
||||
no_shift = 0,
|
||||
asr,
|
||||
lsl,
|
||||
lsr,
|
||||
ror,
|
||||
rrx
|
||||
};
|
||||
|
||||
enum AddrOpc {
|
||||
add = '+', sub = '-'
|
||||
};
|
||||
|
||||
static inline const char *getShiftOpcStr(ShiftOpc Op) {
|
||||
switch (Op) {
|
||||
default: assert(0 && "Unknown shift opc!");
|
||||
case ARM_AM::asr: return "asr";
|
||||
case ARM_AM::lsl: return "lsl";
|
||||
case ARM_AM::lsr: return "lsr";
|
||||
case ARM_AM::ror: return "ror";
|
||||
case ARM_AM::rrx: return "rrx";
|
||||
}
|
||||
}
|
||||
|
||||
static inline ShiftOpc getShiftOpcForNode(SDOperand N) {
|
||||
switch (N.getOpcode()) {
|
||||
default: return ARM_AM::no_shift;
|
||||
case ISD::SHL: return ARM_AM::lsl;
|
||||
case ISD::SRL: return ARM_AM::lsr;
|
||||
case ISD::SRA: return ARM_AM::asr;
|
||||
case ISD::ROTR: return ARM_AM::ror;
|
||||
//case ISD::ROTL: // Only if imm -> turn into ROTR.
|
||||
// Can't handle RRX here, because it would require folding a flag into
|
||||
// the addressing mode. :( This causes us to miss certain things.
|
||||
//case ARMISD::RRX: return ARM_AM::rrx;
|
||||
}
|
||||
}
|
||||
|
||||
enum AMSubMode {
|
||||
bad_am_submode = 0,
|
||||
ia,
|
||||
ib,
|
||||
da,
|
||||
db
|
||||
};
|
||||
|
||||
static inline const char *getAMSubModeStr(AMSubMode Mode) {
|
||||
switch (Mode) {
|
||||
default: assert(0 && "Unknown addressing sub-mode!");
|
||||
case ARM_AM::ia: return "ia";
|
||||
case ARM_AM::ib: return "ib";
|
||||
case ARM_AM::da: return "da";
|
||||
case ARM_AM::db: return "db";
|
||||
}
|
||||
}
|
||||
|
||||
static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
|
||||
switch (Mode) {
|
||||
default: assert(0 && "Unknown addressing sub-mode!");
|
||||
case ARM_AM::ia: return isLD ? "fd" : "ea";
|
||||
case ARM_AM::ib: return isLD ? "ed" : "fa";
|
||||
case ARM_AM::da: return isLD ? "fa" : "ed";
|
||||
case ARM_AM::db: return isLD ? "ea" : "fd";
|
||||
}
|
||||
}
|
||||
|
||||
/// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
|
||||
///
|
||||
static inline unsigned rotr32(unsigned Val, unsigned Amt) {
|
||||
assert(Amt < 32 && "Invalid rotate amount");
|
||||
return (Val >> Amt) | (Val << ((32-Amt)&31));
|
||||
}
|
||||
|
||||
/// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
|
||||
///
|
||||
static inline unsigned rotl32(unsigned Val, unsigned Amt) {
|
||||
assert(Amt < 32 && "Invalid rotate amount");
|
||||
return (Val << Amt) | (Val >> ((32-Amt)&31));
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing Mode #1: shift_operand with registers
|
||||
//===--------------------------------------------------------------------===//
|
||||
//
|
||||
// This 'addressing mode' is used for arithmetic instructions. It can
|
||||
// represent things like:
|
||||
// reg
|
||||
// reg [asr|lsl|lsr|ror|rrx] reg
|
||||
// reg [asr|lsl|lsr|ror|rrx] imm
|
||||
//
|
||||
// This is stored three operands [rega, regb, opc]. The first is the base
|
||||
// reg, the second is the shift amount (or reg0 if not present or imm). The
|
||||
// third operand encodes the shift opcode and the imm if a reg isn't present.
|
||||
//
|
||||
static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
|
||||
return ShOp | (Imm << 3);
|
||||
}
|
||||
static inline unsigned getSORegOffset(unsigned Op) {
|
||||
return Op >> 3;
|
||||
}
|
||||
static inline ShiftOpc getSORegShOp(unsigned Op) {
|
||||
return (ShiftOpc)(Op & 7);
|
||||
}
|
||||
|
||||
/// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
|
||||
/// the 8-bit imm value.
|
||||
static inline unsigned getSOImmValImm(unsigned Imm) {
|
||||
return Imm & 0xFF;
|
||||
}
|
||||
/// getSOImmValRotate - Given an encoded imm field for the reg/imm form, return
|
||||
/// the rotate amount.
|
||||
static inline unsigned getSOImmValRot(unsigned Imm) {
|
||||
return (Imm >> 8) * 2;
|
||||
}
|
||||
|
||||
/// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
|
||||
/// computing the rotate amount to use. If this immediate value cannot be
|
||||
/// handled with a single shifter-op, determine a good rotate amount that will
|
||||
/// take a maximal chunk of bits out of the immediate.
|
||||
static inline unsigned getSOImmValRotate(unsigned Imm) {
|
||||
// 8-bit (or less) immediates are trivially shifter_operands with a rotate
|
||||
// of zero.
|
||||
if ((Imm & ~255U) == 0) return 0;
|
||||
|
||||
// Use CTZ to compute the rotate amount.
|
||||
unsigned TZ = CountTrailingZeros_32(Imm);
|
||||
|
||||
// Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
|
||||
// not 9.
|
||||
unsigned RotAmt = TZ & ~1;
|
||||
|
||||
// If we can handle this spread, return it.
|
||||
if ((rotr32(Imm, RotAmt) & ~255U) == 0)
|
||||
return (32-RotAmt)&31; // HW rotates right, not left.
|
||||
|
||||
// For values like 0xF000000F, we should skip the first run of ones, then
|
||||
// retry the hunt.
|
||||
if (Imm & 1) {
|
||||
unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
|
||||
if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF
|
||||
// Restart the search for a high-order bit after the initial seconds of
|
||||
// ones.
|
||||
unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
|
||||
|
||||
// Rotate amount must be even.
|
||||
unsigned RotAmt2 = TZ2 & ~1;
|
||||
|
||||
// If this fits, use it.
|
||||
if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
|
||||
return (32-RotAmt2)&31; // HW rotates right, not left.
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, we have no way to cover this span of bits with a single
|
||||
// shifter_op immediate. Return a chunk of bits that will be useful to
|
||||
// handle.
|
||||
return (32-RotAmt)&31; // HW rotates right, not left.
|
||||
}
|
||||
|
||||
/// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
|
||||
/// into an shifter_operand immediate operand, return the 12-bit encoding for
|
||||
/// it. If not, return -1.
|
||||
static inline int getSOImmVal(unsigned Arg) {
|
||||
// 8-bit (or less) immediates are trivially shifter_operands with a rotate
|
||||
// of zero.
|
||||
if ((Arg & ~255U) == 0) return Arg;
|
||||
|
||||
unsigned RotAmt = getSOImmValRotate(Arg);
|
||||
|
||||
// If this cannot be handled with a single shifter_op, bail out.
|
||||
if (rotr32(~255U, RotAmt) & Arg)
|
||||
return -1;
|
||||
|
||||
// Encode this correctly.
|
||||
return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
|
||||
}
|
||||
|
||||
/// isSOImmTwoPartVal - Return true if the specified value can be obtained by
|
||||
/// or'ing together two SOImmVal's.
|
||||
static inline bool isSOImmTwoPartVal(unsigned V) {
|
||||
// If this can be handled with a single shifter_op, bail out.
|
||||
V = rotr32(~255U, getSOImmValRotate(V)) & V;
|
||||
if (V == 0)
|
||||
return false;
|
||||
|
||||
// If this can be handled with two shifter_op's, accept.
|
||||
V = rotr32(~255U, getSOImmValRotate(V)) & V;
|
||||
return V == 0;
|
||||
}
|
||||
|
||||
/// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
|
||||
/// return the first chunk of it.
|
||||
static inline unsigned getSOImmTwoPartFirst(unsigned V) {
|
||||
return rotr32(255U, getSOImmValRotate(V)) & V;
|
||||
}
|
||||
|
||||
/// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
|
||||
/// return the second chunk of it.
|
||||
static inline unsigned getSOImmTwoPartSecond(unsigned V) {
|
||||
// Mask out the first hunk.
|
||||
V = rotr32(~255U, getSOImmValRotate(V)) & V;
|
||||
|
||||
// Take what's left.
|
||||
assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
|
||||
return V;
|
||||
}
|
||||
|
||||
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
|
||||
/// by a left shift. Returns the shift amount to use.
|
||||
static inline unsigned getThumbImmValShift(unsigned Imm) {
|
||||
// 8-bit (or less) immediates are trivially immediate operand with a shift
|
||||
// of zero.
|
||||
if ((Imm & ~255U) == 0) return 0;
|
||||
|
||||
// Use CTZ to compute the shift amount.
|
||||
return CountTrailingZeros_32(Imm);
|
||||
}
|
||||
|
||||
/// isThumbImmShiftedVal - Return true if the specified value can be obtained
|
||||
/// by left shifting a 8-bit immediate.
|
||||
static inline bool isThumbImmShiftedVal(unsigned V) {
|
||||
// If this can be handled with
|
||||
V = (~255U << getThumbImmValShift(V)) & V;
|
||||
return V == 0;
|
||||
}
|
||||
|
||||
/// getThumbImmNonShiftedVal - If V is a value that satisfies
|
||||
/// isThumbImmShiftedVal, return the non-shiftd value.
|
||||
static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
|
||||
return V >> getThumbImmValShift(V);
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing Mode #2
|
||||
//===--------------------------------------------------------------------===//
|
||||
//
|
||||
// This is used for most simple load/store instructions.
|
||||
//
|
||||
// addrmode2 := reg +/- reg shop imm
|
||||
// addrmode2 := reg +/- imm12
|
||||
//
|
||||
// The first operand is always a Reg. The second operand is a reg if in
|
||||
// reg/reg form, otherwise it's reg#0. The third field encodes the operation
|
||||
// in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
|
||||
//
|
||||
// If this addressing mode is a frame index (before prolog/epilog insertion
|
||||
// and code rewriting), this operand will have the form: FI#, reg0, <offs>
|
||||
// with no shift amount for the frame offset.
|
||||
//
|
||||
static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
|
||||
assert(Imm12 < (1 << 12) && "Imm too large!");
|
||||
bool isSub = Opc == sub;
|
||||
return Imm12 | ((int)isSub << 12) | (SO << 13);
|
||||
}
|
||||
static inline unsigned getAM2Offset(unsigned AM2Opc) {
|
||||
return AM2Opc & ((1 << 12)-1);
|
||||
}
|
||||
static inline AddrOpc getAM2Op(unsigned AM2Opc) {
|
||||
return ((AM2Opc >> 12) & 1) ? sub : add;
|
||||
}
|
||||
static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
|
||||
return (ShiftOpc)(AM2Opc >> 13);
|
||||
}
|
||||
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing Mode #3
|
||||
//===--------------------------------------------------------------------===//
|
||||
//
|
||||
// This is used for sign-extending loads, and load/store-pair instructions.
|
||||
//
|
||||
// addrmode3 := reg +/- reg
|
||||
// addrmode3 := reg +/- imm8
|
||||
//
|
||||
// The first operand is always a Reg. The second operand is a reg if in
|
||||
// reg/reg form, otherwise it's reg#0. The third field encodes the operation
|
||||
// in bit 8, the immediate in bits 0-7.
|
||||
|
||||
/// getAM3Opc - This function encodes the addrmode3 opc field.
|
||||
static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
|
||||
bool isSub = Opc == sub;
|
||||
return ((int)isSub << 8) | Offset;
|
||||
}
|
||||
static inline unsigned char getAM3Offset(unsigned AM3Opc) {
|
||||
return AM3Opc & 0xFF;
|
||||
}
|
||||
static inline AddrOpc getAM3Op(unsigned AM3Opc) {
|
||||
return ((AM3Opc >> 8) & 1) ? sub : add;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing Mode #4
|
||||
//===--------------------------------------------------------------------===//
|
||||
//
|
||||
// This is used for load / store multiple instructions.
|
||||
//
|
||||
// addrmode4 := reg, <mode>
|
||||
//
|
||||
// The four modes are:
|
||||
// IA - Increment after
|
||||
// IB - Increment before
|
||||
// DA - Decrement after
|
||||
// DB - Decrement before
|
||||
//
|
||||
// If the 4th bit (writeback)is set, then the base register is updated after
|
||||
// the memory transfer.
|
||||
|
||||
static inline AMSubMode getAM4SubMode(unsigned Mode) {
|
||||
return (AMSubMode)(Mode & 0x7);
|
||||
}
|
||||
|
||||
static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
|
||||
return (int)SubMode | ((int)WB << 3);
|
||||
}
|
||||
|
||||
static inline bool getAM4WBFlag(unsigned Mode) {
|
||||
return (Mode >> 3) & 1;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing Mode #5
|
||||
//===--------------------------------------------------------------------===//
|
||||
//
|
||||
// This is used for coprocessor instructions, such as FP load/stores.
|
||||
//
|
||||
// addrmode5 := reg +/- imm8*4
|
||||
//
|
||||
// The first operand is always a Reg. The third field encodes the operation
|
||||
// in bit 8, the immediate in bits 0-7.
|
||||
//
|
||||
// This can also be used for FP load/store multiple ops. The third field encodes
|
||||
// writeback mode in bit 8, the number of registers (or 2 times the number of
|
||||
// registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
|
||||
// following two sub-modes:
|
||||
//
|
||||
// IA - Increment after
|
||||
// DB - Decrement before
|
||||
|
||||
/// getAM5Opc - This function encodes the addrmode5 opc field.
|
||||
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
|
||||
bool isSub = Opc == sub;
|
||||
return ((int)isSub << 8) | Offset;
|
||||
}
|
||||
static inline unsigned char getAM5Offset(unsigned AM5Opc) {
|
||||
return AM5Opc & 0xFF;
|
||||
}
|
||||
static inline AddrOpc getAM5Op(unsigned AM5Opc) {
|
||||
return ((AM5Opc >> 8) & 1) ? sub : add;
|
||||
}
|
||||
|
||||
/// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
|
||||
/// FSTM instructions.
|
||||
static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
|
||||
unsigned char Offset) {
|
||||
assert((SubMode == ia || SubMode == db) &&
|
||||
"Illegal addressing mode 5 sub-mode!");
|
||||
return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
|
||||
}
|
||||
static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
|
||||
return (AMSubMode)((AM5Opc >> 9) & 0x7);
|
||||
}
|
||||
static inline bool getAM5WBFlag(unsigned AM5Opc) {
|
||||
return ((AM5Opc >> 8) & 1);
|
||||
}
|
||||
|
||||
} // end namespace ARM_AM
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,84 +0,0 @@
|
||||
//===-- ARMCommon.cpp - Define support functions for ARM --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by the "Instituto Nokia de Tecnologia" and
|
||||
// is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "ARMCommon.h"
|
||||
|
||||
static inline unsigned rotateL(unsigned x, unsigned n){
|
||||
return ((x << n) | (x >> (32 - n)));
|
||||
}
|
||||
|
||||
static inline unsigned rotateR(unsigned x, unsigned n){
|
||||
return ((x >> n) | (x << (32 - n)));
|
||||
}
|
||||
|
||||
// finds the end position of largest sequence of zeros in binary representation
|
||||
// of 'immediate'.
|
||||
static int findLargestZeroSequence(unsigned immediate){
|
||||
int max_zero_pos = 0;
|
||||
int max_zero_length = 0;
|
||||
int zero_pos;
|
||||
int zero_length;
|
||||
int pos = 0;
|
||||
int end_pos;
|
||||
|
||||
while ((immediate & 0x3) == 0) {
|
||||
immediate = rotateR(immediate, 2);
|
||||
pos+=2;
|
||||
}
|
||||
end_pos = pos+32;
|
||||
|
||||
while (pos<end_pos){
|
||||
while (((immediate & 0x3) != 0)&&(pos<end_pos)) {
|
||||
immediate = rotateR(immediate, 2);
|
||||
pos+=2;
|
||||
}
|
||||
zero_pos = pos;
|
||||
while (((immediate & 0x3) == 0)&&(pos<end_pos)) {
|
||||
immediate = rotateR(immediate, 2);
|
||||
pos+=2;
|
||||
}
|
||||
zero_length = pos - zero_pos;
|
||||
if (zero_length > max_zero_length){
|
||||
max_zero_length = zero_length;
|
||||
max_zero_pos = zero_pos % 32;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return (max_zero_pos + max_zero_length) % 32;
|
||||
}
|
||||
|
||||
std::vector<unsigned> splitImmediate(unsigned immediate){
|
||||
std::vector<unsigned> immediatePieces;
|
||||
|
||||
if (immediate == 0){
|
||||
immediatePieces.push_back(0);
|
||||
} else {
|
||||
int start_pos = findLargestZeroSequence(immediate);
|
||||
unsigned immediate_tmp = rotateR(immediate, start_pos);
|
||||
int pos = 0;
|
||||
while (pos < 32){
|
||||
while(((immediate_tmp&0x3) == 0)&&(pos<32)){
|
||||
immediate_tmp = rotateR(immediate_tmp,2);
|
||||
pos+=2;
|
||||
}
|
||||
if (pos < 32){
|
||||
immediatePieces.push_back(rotateL(immediate_tmp&0xFF,
|
||||
(start_pos + pos) % 32 ));
|
||||
immediate_tmp = rotateR(immediate_tmp,8);
|
||||
pos+=8;
|
||||
}
|
||||
}
|
||||
}
|
||||
return immediatePieces;
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
//===-- ARMCommon.h - Define support functions for ARM ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by the "Instituto Nokia de Tecnologia" and
|
||||
// is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM_COMMON_H
|
||||
#define ARM_COMMON_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
std::vector<unsigned> splitImmediate(unsigned immediate);
|
||||
|
||||
#endif
|
490
lib/Target/ARM/ARMConstantIslandPass.cpp
Normal file
490
lib/Target/ARM/ARMConstantIslandPass.cpp
Normal file
@ -0,0 +1,490 @@
|
||||
//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Chris Lattner and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that splits the constant pool up into 'islands'
|
||||
// which are scattered through-out the function. This is required due to the
|
||||
// limited pc-relative displacements that ARM has.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm-cp-islands"
|
||||
#include "ARM.h"
|
||||
#include "ARMInstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
||||
#include "llvm/Target/TargetAsmInfo.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include <iostream>
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(NumSplit, "Number of uncond branches inserted");
|
||||
|
||||
namespace {
|
||||
/// ARMConstantIslands - Due to limited pc-relative displacements, ARM
|
||||
/// requires constant pool entries to be scattered among the instructions
|
||||
/// inside a function. To do this, it completely ignores the normal LLVM
|
||||
/// constant pool, instead, it places constants where-ever it feels like with
|
||||
/// special instructions.
|
||||
///
|
||||
/// The terminology used in this pass includes:
|
||||
/// Islands - Clumps of constants placed in the function.
|
||||
/// Water - Potential places where an island could be formed.
|
||||
/// CPE - A constant pool entry that has been placed somewhere, which
|
||||
/// tracks a list of users.
|
||||
class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
|
||||
/// NextUID - Assign unique ID's to CPE's.
|
||||
unsigned NextUID;
|
||||
|
||||
/// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
|
||||
/// by MBB Number.
|
||||
std::vector<unsigned> BBSizes;
|
||||
|
||||
/// WaterList - A sorted list of basic blocks where islands could be placed
|
||||
/// (i.e. blocks that don't fall through to the following block, due
|
||||
/// to a return, unreachable, or unconditional branch).
|
||||
std::vector<MachineBasicBlock*> WaterList;
|
||||
|
||||
/// CPUser - One user of a constant pool, keeping the machine instruction
|
||||
/// pointer, the constant pool being referenced, and the max displacement
|
||||
/// allowed from the instruction to the CP.
|
||||
struct CPUser {
|
||||
MachineInstr *MI;
|
||||
MachineInstr *CPEMI;
|
||||
unsigned MaxDisp;
|
||||
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
|
||||
: MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
|
||||
};
|
||||
|
||||
/// CPUsers - Keep track of all of the machine instructions that use various
|
||||
/// constant pools and their max displacement.
|
||||
std::vector<CPUser> CPUsers;
|
||||
|
||||
const TargetInstrInfo *TII;
|
||||
const TargetAsmInfo *TAI;
|
||||
public:
|
||||
virtual bool runOnMachineFunction(MachineFunction &Fn);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM constant island placement pass";
|
||||
}
|
||||
|
||||
private:
|
||||
void DoInitialPlacement(MachineFunction &Fn,
|
||||
std::vector<MachineInstr*> &CPEMIs);
|
||||
void InitialFunctionScan(MachineFunction &Fn,
|
||||
const std::vector<MachineInstr*> &CPEMIs);
|
||||
void SplitBlockBeforeInstr(MachineInstr *MI);
|
||||
bool HandleConstantPoolUser(MachineFunction &Fn, CPUser &U);
|
||||
void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
|
||||
|
||||
unsigned GetInstSize(MachineInstr *MI) const;
|
||||
unsigned GetOffsetOf(MachineInstr *MI) const;
|
||||
};
|
||||
}
|
||||
|
||||
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
|
||||
/// optimization pass.
|
||||
FunctionPass *llvm::createARMConstantIslandPass() {
|
||||
return new ARMConstantIslands();
|
||||
}
|
||||
|
||||
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
|
||||
// If there are no constants, there is nothing to do.
|
||||
MachineConstantPool &MCP = *Fn.getConstantPool();
|
||||
if (MCP.isEmpty()) return false;
|
||||
|
||||
TII = Fn.getTarget().getInstrInfo();
|
||||
TAI = Fn.getTarget().getTargetAsmInfo();
|
||||
|
||||
// Renumber all of the machine basic blocks in the function, guaranteeing that
|
||||
// the numbers agree with the position of the block in the function.
|
||||
Fn.RenumberBlocks();
|
||||
|
||||
// Perform the initial placement of the constant pool entries. To start with,
|
||||
// we put them all at the end of the function.
|
||||
std::vector<MachineInstr*> CPEMIs;
|
||||
DoInitialPlacement(Fn, CPEMIs);
|
||||
|
||||
/// The next UID to take is the first unused one.
|
||||
NextUID = CPEMIs.size();
|
||||
|
||||
// Do the initial scan of the function, building up information about the
|
||||
// sizes of each block, the location of all the water, and finding all of the
|
||||
// constant pool users.
|
||||
InitialFunctionScan(Fn, CPEMIs);
|
||||
CPEMIs.clear();
|
||||
|
||||
// Iteratively place constant pool entries until there is no change.
|
||||
bool MadeChange;
|
||||
do {
|
||||
MadeChange = false;
|
||||
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
|
||||
MadeChange |= HandleConstantPoolUser(Fn, CPUsers[i]);
|
||||
} while (MadeChange);
|
||||
|
||||
BBSizes.clear();
|
||||
WaterList.clear();
|
||||
CPUsers.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// DoInitialPlacement - Perform the initial placement of the constant pool
|
||||
/// entries. To start with, we put them all at the end of the function.
|
||||
void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
|
||||
std::vector<MachineInstr*> &CPEMIs){
|
||||
// Create the basic block to hold the CPE's.
|
||||
MachineBasicBlock *BB = new MachineBasicBlock();
|
||||
Fn.getBasicBlockList().push_back(BB);
|
||||
|
||||
// Add all of the constants from the constant pool to the end block, use an
|
||||
// identity mapping of CPI's to CPE's.
|
||||
const std::vector<MachineConstantPoolEntry> &CPs =
|
||||
Fn.getConstantPool()->getConstants();
|
||||
|
||||
const TargetData &TD = *Fn.getTarget().getTargetData();
|
||||
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
|
||||
unsigned Size = TD.getTypeSize(CPs[i].getType());
|
||||
// Verify that all constant pool entries are a multiple of 4 bytes. If not,
|
||||
// we would have to pad them out or something so that instructions stay
|
||||
// aligned.
|
||||
assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
|
||||
MachineInstr *CPEMI =
|
||||
BuildMI(BB, TII->get(ARM::CONSTPOOL_ENTRY))
|
||||
.addImm(i).addConstantPoolIndex(i).addImm(Size);
|
||||
CPEMIs.push_back(CPEMI);
|
||||
DEBUG(std::cerr << "Moved CPI#" << i << " to end of function as #"
|
||||
<< i << "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/// BBHasFallthrough - Return true of the specified basic block can fallthrough
|
||||
/// into the block immediately after it.
|
||||
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
|
||||
// Get the next machine basic block in the function.
|
||||
MachineFunction::iterator MBBI = MBB;
|
||||
if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *NextBB = next(MBBI);
|
||||
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
|
||||
E = MBB->succ_end(); I != E; ++I)
|
||||
if (*I == NextBB)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// InitialFunctionScan - Do the initial scan of the function, building up
|
||||
/// information about the sizes of each block, the location of all the water,
|
||||
/// and finding all of the constant pool users.
|
||||
void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
|
||||
const std::vector<MachineInstr*> &CPEMIs) {
|
||||
for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
|
||||
MBBI != E; ++MBBI) {
|
||||
MachineBasicBlock &MBB = *MBBI;
|
||||
|
||||
// If this block doesn't fall through into the next MBB, then this is
|
||||
// 'water' that a constant pool island could be placed.
|
||||
if (!BBHasFallthrough(&MBB))
|
||||
WaterList.push_back(&MBB);
|
||||
|
||||
unsigned MBBSize = 0;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
// Add instruction size to MBBSize.
|
||||
MBBSize += GetInstSize(I);
|
||||
|
||||
// Scan the instructions for constant pool operands.
|
||||
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
|
||||
if (I->getOperand(op).isConstantPoolIndex()) {
|
||||
// We found one. The addressing mode tells us the max displacement
|
||||
// from the PC that this instruction permits.
|
||||
unsigned MaxOffs = 0;
|
||||
|
||||
// Basic size info comes from the TSFlags field.
|
||||
unsigned TSFlags = I->getInstrDescriptor()->TSFlags;
|
||||
switch (TSFlags & ARMII::AddrModeMask) {
|
||||
default:
|
||||
// Constant pool entries can reach anything.
|
||||
if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
|
||||
continue;
|
||||
assert(0 && "Unknown addressing mode for CP reference!");
|
||||
case ARMII::AddrMode1: // AM1: 8 bits << 2
|
||||
MaxOffs = 1 << (8+2); // Taking the address of a CP entry.
|
||||
break;
|
||||
case ARMII::AddrMode2:
|
||||
MaxOffs = 1 << 12; // +-offset_12
|
||||
break;
|
||||
case ARMII::AddrMode3:
|
||||
MaxOffs = 1 << 8; // +-offset_8
|
||||
break;
|
||||
// addrmode4 has no immediate offset.
|
||||
case ARMII::AddrMode5:
|
||||
MaxOffs = 1 << (8+2); // +-(offset_8*4)
|
||||
break;
|
||||
case ARMII::AddrModeT1:
|
||||
MaxOffs = 1 << 5;
|
||||
break;
|
||||
case ARMII::AddrModeT2:
|
||||
MaxOffs = 1 << (5+1);
|
||||
break;
|
||||
case ARMII::AddrModeT4:
|
||||
MaxOffs = 1 << (5+2);
|
||||
break;
|
||||
}
|
||||
|
||||
// Remember that this is a user of a CP entry.
|
||||
MachineInstr *CPEMI =CPEMIs[I->getOperand(op).getConstantPoolIndex()];
|
||||
CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
|
||||
|
||||
// Instructions can only use one CP entry, don't bother scanning the
|
||||
// rest of the operands.
|
||||
break;
|
||||
}
|
||||
}
|
||||
BBSizes.push_back(MBBSize);
|
||||
}
|
||||
}
|
||||
|
||||
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
|
||||
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
|
||||
unsigned JTI) DISABLE_INLINE;
|
||||
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
|
||||
unsigned JTI) {
|
||||
return JT[JTI].MBBs.size();
|
||||
}
|
||||
|
||||
/// GetInstSize - Return the size of the specified MachineInstr.
|
||||
///
|
||||
unsigned ARMConstantIslands::GetInstSize(MachineInstr *MI) const {
|
||||
// Basic size info comes from the TSFlags field.
|
||||
unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
|
||||
|
||||
switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
|
||||
default:
|
||||
// If this machine instr is an inline asm, measure it.
|
||||
if (MI->getOpcode() == ARM::INLINEASM)
|
||||
return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
|
||||
assert(0 && "Unknown or unset size field for instr!");
|
||||
break;
|
||||
case ARMII::Size8Bytes: return 8; // Arm instruction x 2.
|
||||
case ARMII::Size4Bytes: return 4; // Arm instruction.
|
||||
case ARMII::Size2Bytes: return 2; // Thumb instruction.
|
||||
case ARMII::SizeSpecial: {
|
||||
switch (MI->getOpcode()) {
|
||||
case ARM::CONSTPOOL_ENTRY:
|
||||
// If this machine instr is a constant pool entry, its size is recorded as
|
||||
// operand #2.
|
||||
return MI->getOperand(2).getImm();
|
||||
case ARM::BR_JTr:
|
||||
case ARM::BR_JTm:
|
||||
case ARM::BR_JTadd: {
|
||||
// These are jumptable branches, i.e. a branch followed by an inlined
|
||||
// jumptable. The size is 4 + 4 * number of entries.
|
||||
unsigned JTI = MI->getOperand(MI->getNumOperands()-2).getJumpTableIndex();
|
||||
const MachineFunction *MF = MI->getParent()->getParent();
|
||||
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
assert(JTI < JT.size());
|
||||
return getNumJTEntries(JT, JTI) * 4 + 4;
|
||||
}
|
||||
default:
|
||||
// Otherwise, pseudo-instruction sizes are zero.
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GetOffsetOf - Return the current offset of the specified machine instruction
|
||||
/// from the start of the function. This offset changes as stuff is moved
|
||||
/// around inside the function.
|
||||
unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
|
||||
// The offset is composed of two things: the sum of the sizes of all MBB's
|
||||
// before this instruction's block, and the offset from the start of the block
|
||||
// it is in.
|
||||
unsigned Offset = 0;
|
||||
|
||||
// Sum block sizes before MBB.
|
||||
for (unsigned BB = 0, e = MBB->getNumber(); BB != e; ++BB)
|
||||
Offset += BBSizes[BB];
|
||||
|
||||
// Sum instructions before MI in MBB.
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
|
||||
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
|
||||
if (&*I == MI) return Offset;
|
||||
Offset += GetInstSize(I);
|
||||
}
|
||||
}
|
||||
|
||||
/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
|
||||
/// ID.
|
||||
static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
|
||||
const MachineBasicBlock *RHS) {
|
||||
return LHS->getNumber() < RHS->getNumber();
|
||||
}
|
||||
|
||||
/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
|
||||
/// machine function, it upsets all of the block numbers. Renumber the blocks
|
||||
/// and update the arrays that parallel this numbering.
|
||||
void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
|
||||
// Renumber the MBB's to keep them consequtive.
|
||||
NewBB->getParent()->RenumberBlocks(NewBB);
|
||||
|
||||
// Insert a size into BBSizes to align it properly with the (newly
|
||||
// renumbered) block numbers.
|
||||
BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
|
||||
|
||||
// Next, update WaterList. Specifically, we need to add NewMBB as having
|
||||
// available water after it.
|
||||
std::vector<MachineBasicBlock*>::iterator IP =
|
||||
std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
|
||||
CompareMBBNumbers);
|
||||
WaterList.insert(IP, NewBB);
|
||||
}
|
||||
|
||||
|
||||
/// Split the basic block containing MI into two blocks, which are joined by
|
||||
/// an unconditional branch. Update datastructures and renumber blocks to
|
||||
/// account for this change.
|
||||
void ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
|
||||
MachineBasicBlock *OrigBB = MI->getParent();
|
||||
|
||||
// Create a new MBB for the code after the OrigBB.
|
||||
MachineBasicBlock *NewBB = new MachineBasicBlock(OrigBB->getBasicBlock());
|
||||
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
|
||||
OrigBB->getParent()->getBasicBlockList().insert(MBBI, NewBB);
|
||||
|
||||
// Splice the instructions starting with MI over to NewBB.
|
||||
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
|
||||
|
||||
// Add an unconditional branch from OrigBB to NewBB.
|
||||
BuildMI(OrigBB, TII->get(ARM::B)).addMBB(NewBB);
|
||||
NumSplit++;
|
||||
|
||||
// Update the CFG. All succs of OrigBB are now succs of NewBB.
|
||||
while (!OrigBB->succ_empty()) {
|
||||
MachineBasicBlock *Succ = *OrigBB->succ_begin();
|
||||
OrigBB->removeSuccessor(Succ);
|
||||
NewBB->addSuccessor(Succ);
|
||||
|
||||
// This pass should be run after register allocation, so there should be no
|
||||
// PHI nodes to update.
|
||||
assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
|
||||
&& "PHI nodes should be eliminated by now!");
|
||||
}
|
||||
|
||||
// OrigBB branches to NewBB.
|
||||
OrigBB->addSuccessor(NewBB);
|
||||
|
||||
// Update internal data structures to account for the newly inserted MBB.
|
||||
UpdateForInsertedWaterBlock(NewBB);
|
||||
|
||||
// Figure out how large the first NewMBB is.
|
||||
unsigned NewBBSize = 0;
|
||||
for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
|
||||
I != E; ++I)
|
||||
NewBBSize += GetInstSize(I);
|
||||
|
||||
// Set the size of NewBB in BBSizes.
|
||||
BBSizes[NewBB->getNumber()] = NewBBSize;
|
||||
|
||||
// We removed instructions from UserMBB, subtract that off from its size.
|
||||
// Add 4 to the block to count the unconditional branch we added to it.
|
||||
BBSizes[OrigBB->getNumber()] -= NewBBSize-4;
|
||||
}
|
||||
|
||||
/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
|
||||
/// is out-of-range. If so, pick it up the constant pool value and move it some
|
||||
/// place in-range.
|
||||
bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, CPUser &U){
|
||||
MachineInstr *UserMI = U.MI;
|
||||
MachineInstr *CPEMI = U.CPEMI;
|
||||
|
||||
unsigned UserOffset = GetOffsetOf(UserMI);
|
||||
unsigned CPEOffset = GetOffsetOf(CPEMI);
|
||||
|
||||
DEBUG(std::cerr << "User of CPE#" << CPEMI->getOperand(0).getImm()
|
||||
<< " max delta=" << U.MaxDisp
|
||||
<< " at offset " << int(UserOffset-CPEOffset) << "\t"
|
||||
<< *UserMI);
|
||||
|
||||
// Check to see if the CPE is already in-range.
|
||||
if (UserOffset < CPEOffset) {
|
||||
// User before the CPE.
|
||||
if (CPEOffset-UserOffset <= U.MaxDisp)
|
||||
return false;
|
||||
} else {
|
||||
if (UserOffset-CPEOffset <= U.MaxDisp)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Solution guaranteed to work: split the user's MBB right before the user and
|
||||
// insert a clone the CPE into the newly created water.
|
||||
|
||||
// If the user isn't at the start of its MBB, or if there is a fall-through
|
||||
// into the user's MBB, split the MBB before the User.
|
||||
MachineBasicBlock *UserMBB = UserMI->getParent();
|
||||
if (&UserMBB->front() != UserMI ||
|
||||
UserMBB == &Fn.front() || // entry MBB of function.
|
||||
BBHasFallthrough(prior(MachineFunction::iterator(UserMBB)))) {
|
||||
// TODO: Search for the best place to split the code. In practice, using
|
||||
// loop nesting information to insert these guys outside of loops would be
|
||||
// sufficient.
|
||||
SplitBlockBeforeInstr(UserMI);
|
||||
|
||||
// UserMI's BB may have changed.
|
||||
UserMBB = UserMI->getParent();
|
||||
}
|
||||
|
||||
// Okay, we know we can put an island before UserMBB now, do it!
|
||||
MachineBasicBlock *NewIsland = new MachineBasicBlock();
|
||||
Fn.getBasicBlockList().insert(UserMBB, NewIsland);
|
||||
|
||||
// Update internal data structures to account for the newly inserted MBB.
|
||||
UpdateForInsertedWaterBlock(NewIsland);
|
||||
|
||||
// Now that we have an island to add the CPE to, clone the original CPE and
|
||||
// add it to the island.
|
||||
unsigned ID = NextUID++;
|
||||
unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex();
|
||||
unsigned Size = CPEMI->getOperand(2).getImm();
|
||||
|
||||
// Build a new CPE for this user.
|
||||
U.CPEMI = BuildMI(NewIsland, TII->get(ARM::CONSTPOOL_ENTRY))
|
||||
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
|
||||
|
||||
// Increase the size of the island block to account for the new entry.
|
||||
BBSizes[NewIsland->getNumber()] += Size;
|
||||
|
||||
// Finally, change the CPI in the instruction operand to be ID.
|
||||
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
|
||||
if (UserMI->getOperand(i).isConstantPoolIndex()) {
|
||||
UserMI->getOperand(i).setConstantPoolIndex(ID);
|
||||
break;
|
||||
}
|
||||
|
||||
DEBUG(std::cerr << " Moved CPE to #" << ID << " CPI=" << CPI << "\t"
|
||||
<< *UserMI);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
55
lib/Target/ARM/ARMConstantPoolValue.cpp
Normal file
55
lib/Target/ARM/ARMConstantPoolValue.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Evan Cheng and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM specific constantpool value class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARMConstantPoolValue.h"
|
||||
#include "llvm/ADT/FoldingSet.h"
|
||||
#include "llvm/GlobalValue.h"
|
||||
using namespace llvm;
|
||||
|
||||
ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
|
||||
bool isNonLazy, unsigned char PCAdj)
|
||||
: MachineConstantPoolValue((const Type*)gv->getType()),
|
||||
GV(gv), LabelId(id), isNonLazyPtr(isNonLazy), PCAdjust(PCAdj) {}
|
||||
|
||||
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
|
||||
unsigned Alignment) {
|
||||
unsigned AlignMask = (1 << Alignment)-1;
|
||||
const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
|
||||
for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
|
||||
if (Constants[i].isMachineConstantPoolEntry() &&
|
||||
(Constants[i].Offset & AlignMask) == 0) {
|
||||
ARMConstantPoolValue *CPV =
|
||||
(ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
|
||||
if (CPV->GV == GV && CPV->LabelId == LabelId &&
|
||||
CPV->isNonLazyPtr == isNonLazyPtr)
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
|
||||
ID.AddPointer(GV);
|
||||
ID.AddInteger(LabelId);
|
||||
ID.AddInteger((unsigned)isNonLazyPtr);
|
||||
ID.AddInteger(PCAdjust);
|
||||
}
|
||||
|
||||
void ARMConstantPoolValue::print(std::ostream &O) const {
|
||||
O << GV->getName();
|
||||
if (isNonLazyPtr) O << "$non_lazy_ptr";
|
||||
if (PCAdjust != 0) O << "-(LPIC" << LabelId << "+"
|
||||
<< (unsigned)PCAdjust << ")";
|
||||
}
|
50
lib/Target/ARM/ARMConstantPoolValue.h
Normal file
50
lib/Target/ARM/ARMConstantPoolValue.h
Normal file
@ -0,0 +1,50 @@
|
||||
//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Evan Cheng and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM specific constantpool value class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
|
||||
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
|
||||
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
|
||||
/// represent PC relative displacement between the address of the load
|
||||
/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
|
||||
class ARMConstantPoolValue : public MachineConstantPoolValue {
|
||||
GlobalValue *GV; // GlobalValue being loaded.
|
||||
unsigned LabelId; // Label id of the load.
|
||||
bool isNonLazyPtr; // True if loading a Mac OS X non_lazy_ptr stub.
|
||||
unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative.
|
||||
// 8 for ARM, 4 for Thumb.
|
||||
|
||||
public:
|
||||
ARMConstantPoolValue(GlobalValue *gv, unsigned id, bool isNonLazy = false,
|
||||
unsigned char PCAdj = 0);
|
||||
|
||||
GlobalValue *getGV() const { return GV; }
|
||||
unsigned getLabelId() const { return LabelId; }
|
||||
bool isNonLazyPointer() const { return isNonLazyPtr; }
|
||||
unsigned char getPCAdjustment() const { return PCAdjust; }
|
||||
|
||||
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
|
||||
unsigned Alignment);
|
||||
|
||||
virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
|
||||
|
||||
virtual void print(std::ostream &O) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -17,17 +17,15 @@
|
||||
|
||||
#include "ARM.h"
|
||||
#include "llvm/Target/TargetFrameInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "ARMSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARMFrameInfo: public TargetFrameInfo {
|
||||
|
||||
class ARMFrameInfo : public TargetFrameInfo {
|
||||
public:
|
||||
ARMFrameInfo()
|
||||
: TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
|
||||
ARMFrameInfo(const ARMSubtarget &ST)
|
||||
: TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
File diff suppressed because it is too large
Load Diff
1414
lib/Target/ARM/ARMISelLowering.cpp
Normal file
1414
lib/Target/ARM/ARMISelLowering.cpp
Normal file
File diff suppressed because it is too large
Load Diff
134
lib/Target/ARM/ARMISelLowering.h
Normal file
134
lib/Target/ARM/ARMISelLowering.h
Normal file
@ -0,0 +1,134 @@
|
||||
//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Evan Cheng and is distributed under
|
||||
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the interfaces that ARM uses to lower LLVM code into a
|
||||
// selection DAG.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARMISELLOWERING_H
|
||||
#define ARMISELLOWERING_H
|
||||
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
class ARMConstantPoolValue;
|
||||
class ARMSubtarget;
|
||||
|
||||
namespace ARMISD {
|
||||
// ARM Specific DAG Nodes
|
||||
enum NodeType {
|
||||
// Start the numbering where the builting ops and target ops leave off.
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
|
||||
|
||||
Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
|
||||
// TargetExternalSymbol, and TargetGlobalAddress.
|
||||
WrapperCall, // WrapperCall - Same as wrapper, but mark the wrapped
|
||||
// node as call operand.
|
||||
WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
|
||||
|
||||
CALL, // Function call.
|
||||
CALL_NOLINK, // Function call with branch not branch-and-link.
|
||||
tCALL, // Thumb function call.
|
||||
BRCOND, // Conditional branch.
|
||||
BR_JT, // Jumptable branch.
|
||||
RET_FLAG, // Return with a flag operand.
|
||||
|
||||
PIC_ADD, // Add with a PC operand and a PIC label.
|
||||
|
||||
CMP, // ARM compare instructions.
|
||||
CMPFP, // ARM VFP compare instruction, sets FPSCR.
|
||||
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
|
||||
FMSTAT, // ARM fmstat instruction.
|
||||
CMOV, // ARM conditional move instructions.
|
||||
CNEG, // ARM conditional negate instructions.
|
||||
|
||||
FTOSI, // FP to sint within a FP register.
|
||||
FTOUI, // FP to uint within a FP register.
|
||||
SITOF, // sint to FP within a FP register.
|
||||
UITOF, // uint to FP within a FP register.
|
||||
|
||||
MULHILOU, // Lo,Hi = umul LHS, RHS.
|
||||
MULHILOS, // Lo,Hi = smul LHS, RHS.
|
||||
|
||||
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
|
||||
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
|
||||
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
|
||||
|
||||
FMRRD, // double to two gprs.
|
||||
FMDRR // Two gprs to double.
|
||||
};
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARMTargetLowering - X86 Implementation of the TargetLowering interface
|
||||
|
||||
class ARMTargetLowering : public TargetLowering {
|
||||
int VarArgsFrameIndex; // FrameIndex for start of varargs area.
|
||||
public:
|
||||
ARMTargetLowering(TargetMachine &TM);
|
||||
|
||||
virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
|
||||
virtual const char *getTargetNodeName(unsigned Opcode) const;
|
||||
|
||||
virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
|
||||
MachineBasicBlock *MBB);
|
||||
|
||||
/// isLegalAddressImmediate - Return true if the integer value or
|
||||
/// GlobalValue can be used as the offset of the target addressing mode.
|
||||
virtual bool isLegalAddressImmediate(int64_t V) const;
|
||||
virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
|
||||
|
||||
/// getPreIndexedAddressParts - returns true by value, base pointer and
|
||||
/// offset pointer and addressing mode by reference if the node's address
|
||||
/// can be legally represented as pre-indexed load / store address.
|
||||
virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
|
||||
SDOperand &Offset,
|
||||
ISD::MemIndexedMode &AM,
|
||||
SelectionDAG &DAG);
|
||||
|
||||
/// getPostIndexedAddressParts - returns true by value, base pointer and
|
||||
/// offset pointer and addressing mode by reference if this node can be
|
||||
/// combined with a load / store to form a post-indexed load / store.
|
||||
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
|
||||
SDOperand &Base, SDOperand &Offset,
|
||||
ISD::MemIndexedMode &AM,
|
||||
SelectionDAG &DAG);
|
||||
|
||||
virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
|
||||
uint64_t Mask,
|
||||
uint64_t &KnownZero,
|
||||
uint64_t &KnownOne,
|
||||
unsigned Depth) const;
|
||||
ConstraintType getConstraintType(char ConstraintLetter) const;
|
||||
std::pair<unsigned, const TargetRegisterClass*>
|
||||
getRegForInlineAsmConstraint(const std::string &Constraint,
|
||||
MVT::ValueType VT) const;
|
||||
std::vector<unsigned>
|
||||
getRegClassForInlineAsmConstraint(const std::string &Constraint,
|
||||
MVT::ValueType VT) const;
|
||||
private:
|
||||
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
const ARMSubtarget *Subtarget;
|
||||
|
||||
/// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
|
||||
///
|
||||
unsigned ARMPCLabelIndex;
|
||||
|
||||
SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ARMISELLOWERING_H
|
@ -14,46 +14,409 @@
|
||||
|
||||
#include "ARMInstrInfo.h"
|
||||
#include "ARM.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "ARMAddressingModes.h"
|
||||
#include "ARMGenInstrInfo.inc"
|
||||
#include "ARMMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/LiveVariables.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
using namespace llvm;
|
||||
|
||||
ARMInstrInfo::ARMInstrInfo()
|
||||
static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
|
||||
cl::desc("Enable ARM 2-addr to 3-addr conv"));
|
||||
|
||||
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
|
||||
: TargetInstrInfo(ARMInsts, sizeof(ARMInsts)/sizeof(ARMInsts[0])),
|
||||
RI(*this) {
|
||||
RI(*this, STI) {
|
||||
}
|
||||
|
||||
unsigned ARMInstrInfo::getDWARF_LABELOpcode() const {
|
||||
return ARM::DWARF_LABEL;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *ARMInstrInfo::getPointerRegClass() const {
|
||||
return &ARM::IntRegsRegClass;
|
||||
return &ARM::GPRRegClass;
|
||||
}
|
||||
|
||||
/// Return true if the instruction is a register to register move and
|
||||
/// leave the source and dest operands in the passed parameters.
|
||||
///
|
||||
bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg) const {
|
||||
unsigned &SrcReg, unsigned &DstReg) const {
|
||||
MachineOpCode oc = MI.getOpcode();
|
||||
switch (oc) {
|
||||
case ARM::MOV: {
|
||||
assert(MI.getNumOperands() == 4 &&
|
||||
MI.getOperand(0).isRegister() &&
|
||||
default:
|
||||
return false;
|
||||
case ARM::FCPYS:
|
||||
case ARM::FCPYD:
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
DstReg = MI.getOperand(0).getReg();
|
||||
return true;
|
||||
case ARM::MOVrr:
|
||||
case ARM::tMOVrr:
|
||||
assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() &&
|
||||
MI.getOperand(1).isRegister() &&
|
||||
"Invalid ARM MOV instruction");
|
||||
const MachineOperand &Arg = MI.getOperand(1);
|
||||
const MachineOperand &Shift = MI.getOperand(2);
|
||||
if (Arg.isRegister() && Shift.isImmediate() && Shift.getImmedValue() == 0) {
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
DstReg = MI.getOperand(0).getReg();
|
||||
return true;
|
||||
}
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
DstReg = MI.getOperand(0).getReg();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const std::vector<MachineOperand> &Cond)const{
|
||||
// Can only insert uncond branches so far.
|
||||
assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
|
||||
BuildMI(&MBB, get(ARM::b)).addMBB(TBB);
|
||||
unsigned ARMInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const{
|
||||
switch (MI->getOpcode()) {
|
||||
default: break;
|
||||
case ARM::LDR:
|
||||
if (MI->getOperand(1).isFrameIndex() &&
|
||||
MI->getOperand(2).isReg() &&
|
||||
MI->getOperand(3).isImmediate() &&
|
||||
MI->getOperand(2).getReg() == 0 &&
|
||||
MI->getOperand(3).getImmedValue() == 0) {
|
||||
FrameIndex = MI->getOperand(1).getFrameIndex();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
case ARM::FLDD:
|
||||
case ARM::FLDS:
|
||||
if (MI->getOperand(1).isFrameIndex() &&
|
||||
MI->getOperand(2).isImmediate() &&
|
||||
MI->getOperand(2).getImmedValue() == 0) {
|
||||
FrameIndex = MI->getOperand(1).getFrameIndex();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
case ARM::tLDRspi:
|
||||
if (MI->getOperand(1).isFrameIndex() &&
|
||||
MI->getOperand(2).isImmediate() &&
|
||||
MI->getOperand(2).getImmedValue() == 0) {
|
||||
FrameIndex = MI->getOperand(1).getFrameIndex();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned ARMInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const {
|
||||
switch (MI->getOpcode()) {
|
||||
default: break;
|
||||
case ARM::STR:
|
||||
if (MI->getOperand(1).isFrameIndex() &&
|
||||
MI->getOperand(2).isReg() &&
|
||||
MI->getOperand(3).isImmediate() &&
|
||||
MI->getOperand(2).getReg() == 0 &&
|
||||
MI->getOperand(3).getImmedValue() == 0) {
|
||||
FrameIndex = MI->getOperand(1).getFrameIndex();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
case ARM::FSTD:
|
||||
case ARM::FSTS:
|
||||
if (MI->getOperand(1).isFrameIndex() &&
|
||||
MI->getOperand(2).isImmediate() &&
|
||||
MI->getOperand(2).getImmedValue() == 0) {
|
||||
FrameIndex = MI->getOperand(1).getFrameIndex();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
case ARM::tSTRspi:
|
||||
if (MI->getOperand(1).isFrameIndex() &&
|
||||
MI->getOperand(2).isImmediate() &&
|
||||
MI->getOperand(2).getImmedValue() == 0) {
|
||||
FrameIndex = MI->getOperand(1).getFrameIndex();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned getUnindexedOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case ARM::LDR_PRE:
|
||||
case ARM::LDR_POST:
|
||||
return ARM::LDR;
|
||||
case ARM::LDRH_PRE:
|
||||
case ARM::LDRH_POST:
|
||||
return ARM::LDRH;
|
||||
case ARM::LDRB_PRE:
|
||||
case ARM::LDRB_POST:
|
||||
return ARM::LDRB;
|
||||
case ARM::LDRSH_PRE:
|
||||
case ARM::LDRSH_POST:
|
||||
return ARM::LDRSH;
|
||||
case ARM::LDRSB_PRE:
|
||||
case ARM::LDRSB_POST:
|
||||
return ARM::LDRSB;
|
||||
case ARM::STR_PRE:
|
||||
case ARM::STR_POST:
|
||||
return ARM::STR;
|
||||
case ARM::STRH_PRE:
|
||||
case ARM::STRH_POST:
|
||||
return ARM::STRH;
|
||||
case ARM::STRB_PRE:
|
||||
case ARM::STRB_POST:
|
||||
return ARM::STRB;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
LiveVariables &LV) const {
|
||||
if (!EnableARM3Addr)
|
||||
return NULL;
|
||||
|
||||
MachineInstr *MI = MBBI;
|
||||
unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
|
||||
bool isPre = false;
|
||||
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
|
||||
default: return NULL;
|
||||
case ARMII::IndexModePre:
|
||||
isPre = true;
|
||||
break;
|
||||
case ARMII::IndexModePost:
|
||||
break;
|
||||
}
|
||||
|
||||
// Try spliting an indexed load / store to a un-indexed one plus an add/sub
|
||||
// operation.
|
||||
unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
|
||||
if (MemOpc == 0)
|
||||
return NULL;
|
||||
|
||||
MachineInstr *UpdateMI = NULL;
|
||||
MachineInstr *MemMI = NULL;
|
||||
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
|
||||
unsigned NumOps = MI->getNumOperands();
|
||||
bool isLoad = (MI->getInstrDescriptor()->Flags & M_LOAD_FLAG) != 0;
|
||||
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
|
||||
const MachineOperand &Base = MI->getOperand(2);
|
||||
const MachineOperand &Offset = MI->getOperand(NumOps-2);
|
||||
unsigned WBReg = WB.getReg();
|
||||
unsigned BaseReg = Base.getReg();
|
||||
unsigned OffReg = Offset.getReg();
|
||||
unsigned OffImm = MI->getOperand(NumOps-1).getImm();
|
||||
switch (AddrMode) {
|
||||
default:
|
||||
assert(false && "Unknown indexed op!");
|
||||
return NULL;
|
||||
case ARMII::AddrMode2: {
|
||||
bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
|
||||
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
|
||||
if (OffReg == 0) {
|
||||
int SOImmVal = ARM_AM::getSOImmVal(Amt);
|
||||
if (SOImmVal == -1)
|
||||
// Can't encode it in a so_imm operand. This transformation will
|
||||
// add more than 1 instruction. Abandon!
|
||||
return NULL;
|
||||
UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
|
||||
.addReg(BaseReg).addImm(SOImmVal);
|
||||
} else if (Amt != 0) {
|
||||
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
|
||||
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
|
||||
UpdateMI = BuildMI(get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
|
||||
.addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc);
|
||||
} else
|
||||
UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
|
||||
.addReg(BaseReg).addReg(OffReg);
|
||||
break;
|
||||
}
|
||||
case ARMII::AddrMode3 : {
|
||||
bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
|
||||
unsigned Amt = ARM_AM::getAM3Offset(OffImm);
|
||||
if (OffReg == 0)
|
||||
// Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
|
||||
UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
|
||||
.addReg(BaseReg).addImm(Amt);
|
||||
else
|
||||
UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
|
||||
.addReg(BaseReg).addReg(OffReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MachineInstr*> NewMIs;
|
||||
if (isPre) {
|
||||
if (isLoad)
|
||||
MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
|
||||
.addReg(WBReg).addReg(0).addImm(0);
|
||||
else
|
||||
MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
|
||||
.addReg(WBReg).addReg(0).addImm(0);
|
||||
NewMIs.push_back(MemMI);
|
||||
NewMIs.push_back(UpdateMI);
|
||||
} else {
|
||||
if (isLoad)
|
||||
MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
|
||||
.addReg(BaseReg).addReg(0).addImm(0);
|
||||
else
|
||||
MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
|
||||
.addReg(BaseReg).addReg(0).addImm(0);
|
||||
if (WB.isDead())
|
||||
UpdateMI->getOperand(0).setIsDead();
|
||||
NewMIs.push_back(UpdateMI);
|
||||
NewMIs.push_back(MemMI);
|
||||
}
|
||||
|
||||
// Transfer LiveVariables states, kill / dead info.
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
if (MO.isRegister() && MO.getReg() &&
|
||||
MRegisterInfo::isVirtualRegister(MO.getReg())) {
|
||||
unsigned Reg = MO.getReg();
|
||||
LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
|
||||
if (MO.isDef()) {
|
||||
MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
|
||||
if (MO.isDead())
|
||||
LV.addVirtualRegisterDead(Reg, NewMI);
|
||||
// Update the defining instruction.
|
||||
if (VI.DefInst == MI)
|
||||
VI.DefInst = NewMI;
|
||||
}
|
||||
if (MO.isUse() && MO.isKill()) {
|
||||
for (unsigned j = 0; j < 2; ++j) {
|
||||
// Look at the two new MI's in reverse order.
|
||||
MachineInstr *NewMI = NewMIs[j];
|
||||
MachineOperand *NMO = NewMI->findRegisterUseOperand(Reg);
|
||||
if (!NMO)
|
||||
continue;
|
||||
LV.addVirtualRegisterKilled(Reg, NewMI);
|
||||
if (VI.removeKill(MI))
|
||||
VI.Kills.push_back(NewMI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MFI->insert(MBBI, NewMIs[1]);
|
||||
MFI->insert(MBBI, NewMIs[0]);
|
||||
return NewMIs[0];
|
||||
}
|
||||
|
||||
// Branch analysis.
|
||||
bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
std::vector<MachineOperand> &Cond) const {
|
||||
// If the block has no terminators, it just falls into the block after it.
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode()))
|
||||
return false;
|
||||
|
||||
// Get the last instruction in the block.
|
||||
MachineInstr *LastInst = I;
|
||||
|
||||
// If there is only one terminator instruction, process it.
|
||||
unsigned LastOpc = LastInst->getOpcode();
|
||||
if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode())) {
|
||||
if (LastOpc == ARM::B || LastOpc == ARM::tB) {
|
||||
TBB = LastInst->getOperand(0).getMachineBasicBlock();
|
||||
return false;
|
||||
}
|
||||
if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
|
||||
// Block ends with fall-through condbranch.
|
||||
TBB = LastInst->getOperand(0).getMachineBasicBlock();
|
||||
Cond.push_back(LastInst->getOperand(1));
|
||||
return false;
|
||||
}
|
||||
return true; // Can't handle indirect branch.
|
||||
}
|
||||
|
||||
// Get the instruction before it if it is a terminator.
|
||||
MachineInstr *SecondLastInst = I;
|
||||
|
||||
// If there are three terminators, we don't know what sort of block this is.
|
||||
if (SecondLastInst && I != MBB.begin() &&
|
||||
isTerminatorInstr((--I)->getOpcode()))
|
||||
return true;
|
||||
|
||||
// If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
|
||||
unsigned SecondLastOpc = SecondLastInst->getOpcode();
|
||||
if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
|
||||
(SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
|
||||
TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
|
||||
Cond.push_back(SecondLastInst->getOperand(1));
|
||||
FBB = LastInst->getOperand(0).getMachineBasicBlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, can't handle this.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||
int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
|
||||
int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
|
||||
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin()) return;
|
||||
--I;
|
||||
if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
|
||||
return;
|
||||
|
||||
// Remove the branch.
|
||||
I->eraseFromParent();
|
||||
|
||||
I = MBB.end();
|
||||
|
||||
if (I == MBB.begin()) return;
|
||||
--I;
|
||||
if (I->getOpcode() != BccOpc)
|
||||
return;
|
||||
|
||||
// Remove the branch.
|
||||
I->eraseFromParent();
|
||||
}
|
||||
|
||||
void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const std::vector<MachineOperand> &Cond) const {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||
int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
|
||||
int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
|
||||
|
||||
// Shouldn't be a fall through.
|
||||
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
||||
assert((Cond.size() == 1 || Cond.size() == 0) &&
|
||||
"ARM branch conditions have two components!");
|
||||
|
||||
if (FBB == 0) {
|
||||
if (Cond.empty()) // Unconditional branch?
|
||||
BuildMI(&MBB, get(BOpc)).addMBB(TBB);
|
||||
else
|
||||
BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
|
||||
return;
|
||||
}
|
||||
|
||||
// Two-way conditional branch.
|
||||
BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
|
||||
BuildMI(&MBB, get(BOpc)).addMBB(FBB);
|
||||
}
|
||||
|
||||
bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
|
||||
if (MBB.empty()) return false;
|
||||
|
||||
switch (MBB.back().getOpcode()) {
|
||||
case ARM::B:
|
||||
case ARM::tB: // Uncond branch.
|
||||
case ARM::BR_JTr: // Jumptable branch.
|
||||
case ARM::BR_JTm: // Jumptable branch through mem.
|
||||
case ARM::BR_JTadd: // Jumptable branch add to pc.
|
||||
return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ARMInstrInfo::
|
||||
ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
|
||||
ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
|
||||
Cond[0].setImm(ARMCC::getOppositeCondition(CC));
|
||||
return false;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===- ARMInstrInfo.h - ARM Instruction Information --------------*- C++ -*-===//
|
||||
//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -19,11 +19,56 @@
|
||||
#include "ARMRegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class ARMSubtarget;
|
||||
|
||||
/// ARMII - This namespace holds all of the target specific flags that
|
||||
/// instruction info tracks.
|
||||
///
|
||||
namespace ARMII {
|
||||
enum {
|
||||
//===------------------------------------------------------------------===//
|
||||
// Instruction Flags.
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// This three-bit field describes the addressing mode used. Zero is unused
|
||||
// so that we can tell if we forgot to set a value.
|
||||
|
||||
AddrModeMask = 0xf,
|
||||
AddrMode1 = 1,
|
||||
AddrMode2 = 2,
|
||||
AddrMode3 = 3,
|
||||
AddrMode4 = 4,
|
||||
AddrMode5 = 5,
|
||||
AddrModeT1 = 6,
|
||||
AddrModeT2 = 7,
|
||||
AddrModeT4 = 8,
|
||||
AddrModeTs = 9, // i8 * 4 for pc and sp relative data
|
||||
|
||||
// Size* - Flags to keep track of the size of an instruction.
|
||||
SizeShift = 4,
|
||||
SizeMask = 7 << SizeShift,
|
||||
SizeSpecial = 1, // 0 byte pseudo or special case.
|
||||
Size8Bytes = 2,
|
||||
Size4Bytes = 3,
|
||||
Size2Bytes = 4,
|
||||
|
||||
// IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
|
||||
// and store ops
|
||||
IndexModeShift = 7,
|
||||
IndexModeMask = 3 << IndexModeShift,
|
||||
IndexModePre = 1,
|
||||
IndexModePost = 2,
|
||||
|
||||
// Opcode
|
||||
OpcodeShift = 9,
|
||||
OpcodeMask = 0xf << OpcodeShift
|
||||
};
|
||||
}
|
||||
|
||||
class ARMInstrInfo : public TargetInstrInfo {
|
||||
const ARMRegisterInfo RI;
|
||||
public:
|
||||
ARMInstrInfo();
|
||||
ARMInstrInfo(const ARMSubtarget &STI);
|
||||
|
||||
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
|
||||
/// such, whenever a client has an instance of instruction info, it should
|
||||
@ -35,15 +80,33 @@ public:
|
||||
/// This is used for addressing modes.
|
||||
virtual const TargetRegisterClass *getPointerRegClass() const;
|
||||
|
||||
/// getDWARF_LABELOpcode - Return the opcode of the target's DWARF_LABEL
|
||||
/// instruction if it has one. This is used by codegen passes that update
|
||||
/// DWARF line number info as they modify the code.
|
||||
virtual unsigned getDWARF_LABELOpcode() const;
|
||||
|
||||
/// Return true if the instruction is a register to register move and
|
||||
/// leave the source and dest operands in the passed parameters.
|
||||
///
|
||||
virtual bool isMoveInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg) const;
|
||||
virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
|
||||
virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
|
||||
|
||||
virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
LiveVariables &LV) const;
|
||||
|
||||
// Branch analysis.
|
||||
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
std::vector<MachineOperand> &Cond) const;
|
||||
virtual void RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
virtual void InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const std::vector<MachineOperand> &Cond) const;
|
||||
virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
|
||||
virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
513
lib/Target/ARM/ARMInstrThumb.td
Normal file
513
lib/Target/ARM/ARMInstrThumb.td
Normal file
@ -0,0 +1,513 @@
|
||||
//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Chris Lattner and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the Thumb instruction set.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Thumb specific DAG Nodes.
|
||||
//
|
||||
|
||||
def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
|
||||
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
|
||||
|
||||
// TI - Thumb instruction.
|
||||
|
||||
// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
|
||||
class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
|
||||
list<Predicate> Predicates = [IsThumb];
|
||||
}
|
||||
|
||||
class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
|
||||
list<Predicate> Predicates = [IsThumb, HasV5T];
|
||||
}
|
||||
|
||||
class ThumbI<dag ops, AddrMode am, SizeFlagVal sz,
|
||||
string asm, string cstr, list<dag> pattern>
|
||||
// FIXME: Set all opcodes to 0 for now.
|
||||
: InstARM<0, am, sz, IndexModeNone, ops, asm, cstr> {
|
||||
let Pattern = pattern;
|
||||
list<Predicate> Predicates = [IsThumb];
|
||||
}
|
||||
|
||||
class TI<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeNone, Size2Bytes, asm, "", pattern>;
|
||||
class TI1<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeT1, Size2Bytes, asm, "", pattern>;
|
||||
class TI2<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeT2, Size2Bytes, asm, "", pattern>;
|
||||
class TI4<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeT4, Size2Bytes, asm, "", pattern>;
|
||||
class TIs<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeTs, Size2Bytes, asm, "", pattern>;
|
||||
|
||||
// Two-address instructions
|
||||
class TIt<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
|
||||
|
||||
// BL, BLX(1) are translated by assembler into two instructions
|
||||
class TIx2<dag ops, string asm, list<dag> pattern>
|
||||
: ThumbI<ops, AddrModeNone, Size4Bytes, asm, "", pattern>;
|
||||
|
||||
def imm_neg_XFORM : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(-(int)N->getValue(), MVT::i32);
|
||||
}]>;
|
||||
def imm_comp_XFORM : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(~((uint32_t)N->getValue()), MVT::i32);
|
||||
}]>;
|
||||
|
||||
|
||||
/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
|
||||
def imm0_7 : PatLeaf<(i32 imm), [{
|
||||
return (uint32_t)N->getValue() < 8;
|
||||
}]>;
|
||||
def imm0_7_neg : PatLeaf<(i32 imm), [{
|
||||
return (uint32_t)-N->getValue() < 8;
|
||||
}], imm_neg_XFORM>;
|
||||
|
||||
def imm0_255 : PatLeaf<(i32 imm), [{
|
||||
return (uint32_t)N->getValue() < 256;
|
||||
}]>;
|
||||
def imm0_255_comp : PatLeaf<(i32 imm), [{
|
||||
return ~((uint32_t)N->getValue()) < 256;
|
||||
}]>;
|
||||
|
||||
def imm8_255 : PatLeaf<(i32 imm), [{
|
||||
return (uint32_t)N->getValue() >= 8 && (uint32_t)N->getValue() < 256;
|
||||
}]>;
|
||||
def imm8_255_neg : PatLeaf<(i32 imm), [{
|
||||
unsigned Val = -N->getValue();
|
||||
return Val >= 8 && Val < 256;
|
||||
}], imm_neg_XFORM>;
|
||||
|
||||
// Break imm's up into two pieces: an immediate + a left shift.
|
||||
// This uses thumb_immshifted to match and thumb_immshifted_val and
|
||||
// thumb_immshifted_shamt to get the val/shift pieces.
|
||||
def thumb_immshifted : PatLeaf<(imm), [{
|
||||
return ARM_AM::isThumbImmShiftedVal((unsigned)N->getValue());
|
||||
}]>;
|
||||
|
||||
def thumb_immshifted_val : SDNodeXForm<imm, [{
|
||||
unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getValue());
|
||||
return CurDAG->getTargetConstant(V, MVT::i32);
|
||||
}]>;
|
||||
|
||||
def thumb_immshifted_shamt : SDNodeXForm<imm, [{
|
||||
unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getValue());
|
||||
return CurDAG->getTargetConstant(V, MVT::i32);
|
||||
}]>;
|
||||
|
||||
// Define Thumb specific addressing modes.
|
||||
|
||||
// t_addrmode_rr := reg + reg
|
||||
//
|
||||
def t_addrmode_rr : Operand<i32>,
|
||||
ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
|
||||
let PrintMethod = "printThumbAddrModeRROperand";
|
||||
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg);
|
||||
}
|
||||
|
||||
// t_addrmode_ri5_{1|2|4} := reg + imm5 * {1|2|4}
|
||||
//
|
||||
def t_addrmode_ri5_1 : Operand<i32>,
|
||||
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_1", []> {
|
||||
let PrintMethod = "printThumbAddrModeRI5_1Operand";
|
||||
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
|
||||
}
|
||||
def t_addrmode_ri5_2 : Operand<i32>,
|
||||
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_2", []> {
|
||||
let PrintMethod = "printThumbAddrModeRI5_2Operand";
|
||||
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
|
||||
}
|
||||
def t_addrmode_ri5_4 : Operand<i32>,
|
||||
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_4", []> {
|
||||
let PrintMethod = "printThumbAddrModeRI5_4Operand";
|
||||
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
|
||||
}
|
||||
|
||||
// t_addrmode_sp := sp + imm8 * 4
|
||||
//
|
||||
def t_addrmode_sp : Operand<i32>,
|
||||
ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
|
||||
let PrintMethod = "printThumbAddrModeSPOperand";
|
||||
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Miscellaneous Instructions.
|
||||
//
|
||||
|
||||
def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp),
|
||||
"\n$cp:\n\tadd $dst, pc",
|
||||
[(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Control Flow Instructions.
|
||||
//
|
||||
|
||||
let isReturn = 1, isTerminator = 1 in
|
||||
def tBX_RET : TI<(ops), "bx lr", [(ARMretflag)]>;
|
||||
|
||||
// FIXME: remove when we have a way to marking a MI with these properties.
|
||||
let isLoad = 1, isReturn = 1, isTerminator = 1 in
|
||||
def tPOP_RET : TI<(ops reglist:$dst1, variable_ops),
|
||||
"pop $dst1", []>;
|
||||
|
||||
let isCall = 1, noResults = 1,
|
||||
Defs = [R0, R1, R2, R3, LR,
|
||||
D0, D1, D2, D3, D4, D5, D6, D7] in {
|
||||
def tBL : TIx2<(ops i32imm:$func, variable_ops),
|
||||
"bl ${func:call}",
|
||||
[(ARMtcall tglobaladdr:$func)]>;
|
||||
// ARMv5T and above
|
||||
def tBLXi : TIx2<(ops i32imm:$func, variable_ops),
|
||||
"blx ${func:call}",
|
||||
[(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
|
||||
def tBLXr : TI<(ops GPR:$dst, variable_ops),
|
||||
"blx $dst",
|
||||
[(ARMtcall GPR:$dst)]>, Requires<[HasV5T]>;
|
||||
// ARMv4T
|
||||
def tBX : TIx2<(ops GPR:$dst, variable_ops),
|
||||
"cpy lr, pc\n\tbx $dst",
|
||||
[(ARMcall_nolink GPR:$dst)]>;
|
||||
}
|
||||
|
||||
let isBranch = 1, isTerminator = 1, isBarrier = 1 in
|
||||
def tB : TI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>;
|
||||
|
||||
let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in
|
||||
def tBcc : TI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst",
|
||||
[(ARMbrcond bb:$dst, imm:$cc)]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load Store Instructions.
|
||||
//
|
||||
|
||||
let isLoad = 1 in {
|
||||
def tLDRri : TI4<(ops GPR:$dst, t_addrmode_ri5_4:$addr),
|
||||
"ldr $dst, $addr",
|
||||
[(set GPR:$dst, (load t_addrmode_ri5_4:$addr))]>;
|
||||
|
||||
def tLDRrr : TI<(ops GPR:$dst, t_addrmode_rr:$addr),
|
||||
"ldr $dst, $addr",
|
||||
[(set GPR:$dst, (load t_addrmode_rr:$addr))]>;
|
||||
// def tLDRpci
|
||||
def tLDRspi : TIs<(ops GPR:$dst, t_addrmode_sp:$addr),
|
||||
"ldr $dst, $addr",
|
||||
[(set GPR:$dst, (load t_addrmode_sp:$addr))]>;
|
||||
|
||||
def tLDRBri : TI1<(ops GPR:$dst, t_addrmode_ri5_1:$addr),
|
||||
"ldrb $dst, $addr",
|
||||
[(set GPR:$dst, (zextloadi8 t_addrmode_ri5_1:$addr))]>;
|
||||
|
||||
def tLDRBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
|
||||
"ldrb $dst, $addr",
|
||||
[(set GPR:$dst, (zextloadi8 t_addrmode_rr:$addr))]>;
|
||||
|
||||
def tLDRHri : TI2<(ops GPR:$dst, t_addrmode_ri5_2:$addr),
|
||||
"ldrh $dst, $addr",
|
||||
[(set GPR:$dst, (zextloadi16 t_addrmode_ri5_2:$addr))]>;
|
||||
|
||||
def tLDRHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
|
||||
"ldrh $dst, $addr",
|
||||
[(set GPR:$dst, (zextloadi16 t_addrmode_rr:$addr))]>;
|
||||
|
||||
def tLDRSBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
|
||||
"ldrsb $dst, $addr",
|
||||
[(set GPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
|
||||
|
||||
def tLDRSHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
|
||||
"ldrsh $dst, $addr",
|
||||
[(set GPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
|
||||
} // isLoad
|
||||
|
||||
let isStore = 1 in {
|
||||
def tSTRri : TI4<(ops GPR:$src, t_addrmode_ri5_4:$addr),
|
||||
"str $src, $addr",
|
||||
[(store GPR:$src, t_addrmode_ri5_4:$addr)]>;
|
||||
|
||||
def tSTRrr : TI<(ops GPR:$src, t_addrmode_rr:$addr),
|
||||
"str $src, $addr",
|
||||
[(store GPR:$src, t_addrmode_rr:$addr)]>;
|
||||
|
||||
def tSTRspi : TIs<(ops GPR:$src, t_addrmode_sp:$addr),
|
||||
"str $src, $addr",
|
||||
[(store GPR:$src, t_addrmode_sp:$addr)]>;
|
||||
|
||||
def tSTRBri : TI1<(ops GPR:$src, t_addrmode_ri5_1:$addr),
|
||||
"strb $src, $addr",
|
||||
[(truncstorei8 GPR:$src, t_addrmode_ri5_1:$addr)]>;
|
||||
|
||||
def tSTRBrr : TI1<(ops GPR:$src, t_addrmode_rr:$addr),
|
||||
"strb $src, $addr",
|
||||
[(truncstorei8 GPR:$src, t_addrmode_rr:$addr)]>;
|
||||
|
||||
def tSTRHri : TI2<(ops GPR:$src, t_addrmode_ri5_2:$addr),
|
||||
"strh $src, $addr",
|
||||
[(truncstorei16 GPR:$src, t_addrmode_ri5_1:$addr)]>;
|
||||
|
||||
def tSTRHrr : TI2<(ops GPR:$src, t_addrmode_rr:$addr),
|
||||
"strh $src, $addr",
|
||||
[(truncstorei16 GPR:$src, t_addrmode_rr:$addr)]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load / store multiple Instructions.
|
||||
//
|
||||
|
||||
// TODO: A7-44: LDMIA - load multiple
|
||||
|
||||
let isLoad = 1 in
|
||||
def tPOP : TI<(ops reglist:$dst1, variable_ops),
|
||||
"pop $dst1", []>;
|
||||
|
||||
let isStore = 1 in
|
||||
def tPUSH : TI<(ops reglist:$src1, variable_ops),
|
||||
"push $src1", []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Arithmetic Instructions.
|
||||
//
|
||||
|
||||
def tADDi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"add $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (add GPR:$lhs, imm0_7:$rhs))]>;
|
||||
|
||||
def tADDi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"add $dst, $rhs",
|
||||
[(set GPR:$dst, (add GPR:$lhs, imm8_255:$rhs))]>;
|
||||
|
||||
def tADDrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"add $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (add GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tADDhirr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"add $dst, $rhs", []>;
|
||||
|
||||
def tADDrPCi : TI<(ops GPR:$dst, i32imm:$rhs),
|
||||
"add $dst, pc, $rhs * 4", []>;
|
||||
def tADDrSPi : TI<(ops GPR:$dst, GPR:$sp, i32imm:$rhs),
|
||||
"add $dst, $sp, $rhs * 4", []>;
|
||||
def tADDspi : TI<(ops GPR:$sp, i32imm:$rhs),
|
||||
"add $sp, $rhs * 4", []>;
|
||||
|
||||
|
||||
def tAND : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"and $dst, $rhs",
|
||||
[(set GPR:$dst, (and GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tASRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"asr $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (sra GPR:$lhs, imm:$rhs))]>;
|
||||
|
||||
def tASRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"asr $dst, $rhs",
|
||||
[(set GPR:$dst, (sra GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tBIC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"bic $dst, $rhs",
|
||||
[(set GPR:$dst, (and GPR:$lhs, (not GPR:$rhs)))]>;
|
||||
|
||||
|
||||
def tCMN : TI<(ops GPR:$lhs, GPR:$rhs),
|
||||
"cmn $lhs, $rhs",
|
||||
[(ARMcmp GPR:$lhs, (ineg GPR:$rhs))]>;
|
||||
|
||||
def tCMPi8 : TI<(ops GPR:$lhs, i32imm:$rhs),
|
||||
"cmp $lhs, $rhs",
|
||||
[(ARMcmp GPR:$lhs, imm0_255:$rhs)]>;
|
||||
|
||||
def tCMPr : TI<(ops GPR:$lhs, GPR:$rhs),
|
||||
"cmp $lhs, $rhs",
|
||||
[(ARMcmp GPR:$lhs, GPR:$rhs)]>;
|
||||
|
||||
// TODO: A7-37: CMP(3) - cmp hi regs
|
||||
|
||||
def tEOR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"eor $dst, $rhs",
|
||||
[(set GPR:$dst, (xor GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tLSLri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"lsl $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (shl GPR:$lhs, imm:$rhs))]>;
|
||||
|
||||
def tLSLrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"lsl $dst, $rhs",
|
||||
[(set GPR:$dst, (shl GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tLSRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"lsr $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (srl GPR:$lhs, imm:$rhs))]>;
|
||||
|
||||
def tLSRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"lsr $dst, $rhs",
|
||||
[(set GPR:$dst, (srl GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tMOVri8 : TI<(ops GPR:$dst, i32imm:$src),
|
||||
"mov $dst, $src",
|
||||
[(set GPR:$dst, imm0_255:$src)]>;
|
||||
|
||||
// TODO: A7-73: MOV(2) - mov setting flag.
|
||||
|
||||
|
||||
// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
|
||||
// which is MOV(3). This also supports high registers.
|
||||
def tMOVrr : TI<(ops GPR:$dst, GPR:$src),
|
||||
"cpy $dst, $src", []>;
|
||||
|
||||
def tMUL : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"mul $dst, $rhs",
|
||||
[(set GPR:$dst, (mul GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tMVN : TI<(ops GPR:$dst, GPR:$src),
|
||||
"mvn $dst, $src",
|
||||
[(set GPR:$dst, (not GPR:$src))]>;
|
||||
|
||||
def tNEG : TI<(ops GPR:$dst, GPR:$src),
|
||||
"neg $dst, $src",
|
||||
[(set GPR:$dst, (ineg GPR:$src))]>;
|
||||
|
||||
def tORR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"orr $dst, $rhs",
|
||||
[(set GPR:$dst, (or GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
|
||||
def tREV : TI<(ops GPR:$dst, GPR:$src),
|
||||
"rev $dst, $src",
|
||||
[(set GPR:$dst, (bswap GPR:$src))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
|
||||
def tREV16 : TI<(ops GPR:$dst, GPR:$src),
|
||||
"rev16 $dst, $src",
|
||||
[(set GPR:$dst,
|
||||
(or (and (srl GPR:$src, 8), 0xFF),
|
||||
(or (and (shl GPR:$src, 8), 0xFF00),
|
||||
(or (and (srl GPR:$src, 8), 0xFF0000),
|
||||
(and (shl GPR:$src, 8), 0xFF000000)))))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
|
||||
def tREVSH : TI<(ops GPR:$dst, GPR:$src),
|
||||
"revsh $dst, $src",
|
||||
[(set GPR:$dst,
|
||||
(sext_inreg
|
||||
(or (srl (and GPR:$src, 0xFFFF), 8),
|
||||
(shl GPR:$src, 8)), i16))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
|
||||
def tROR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"ror $dst, $rhs",
|
||||
[(set GPR:$dst, (rotr GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tSBC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"sbc $dst, $rhs",
|
||||
[(set GPR:$dst, (sube GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
// TODO: A7-96: STMIA - store multiple.
|
||||
|
||||
def tSUBi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"sub $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (add GPR:$lhs, imm0_7_neg:$rhs))]>;
|
||||
|
||||
def tSUBi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
|
||||
"sub $dst, $rhs",
|
||||
[(set GPR:$dst, (add GPR:$lhs, imm8_255_neg:$rhs))]>;
|
||||
|
||||
def tSUBrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
|
||||
"sub $dst, $lhs, $rhs",
|
||||
[(set GPR:$dst, (sub GPR:$lhs, GPR:$rhs))]>;
|
||||
|
||||
def tSUBspi : TI<(ops GPR:$sp, i32imm:$rhs),
|
||||
"sub $sp, $rhs * 4", []>;
|
||||
|
||||
def tSXTB : TI<(ops GPR:$dst, GPR:$src),
|
||||
"sxtb $dst, $src",
|
||||
[(set GPR:$dst, (sext_inreg GPR:$src, i8))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
def tSXTH : TI<(ops GPR:$dst, GPR:$src),
|
||||
"sxth $dst, $src",
|
||||
[(set GPR:$dst, (sext_inreg GPR:$src, i16))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
|
||||
// TODO: A7-122: TST - test.
|
||||
|
||||
def tUXTB : TI<(ops GPR:$dst, GPR:$src),
|
||||
"uxtb $dst, $src",
|
||||
[(set GPR:$dst, (and GPR:$src, 0xFF))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
def tUXTH : TI<(ops GPR:$dst, GPR:$src),
|
||||
"uxth $dst, $src",
|
||||
[(set GPR:$dst, (and GPR:$src, 0xFFFF))]>,
|
||||
Requires<[IsThumb, HasV6]>;
|
||||
|
||||
|
||||
// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
|
||||
// Expanded by the scheduler into a branch sequence.
|
||||
let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler.
|
||||
def tMOVCCr :
|
||||
PseudoInst<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc),
|
||||
"@ tMOVCCr $cc",
|
||||
[(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>;
|
||||
|
||||
// tLEApcrel - Load a pc-relative address into a register without offending the
|
||||
// assembler.
|
||||
def tLEApcrel : TI<(ops GPR:$dst, i32imm:$label),
|
||||
!strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
|
||||
"${:private}PCRELL${:uid}+4))\n"),
|
||||
!strconcat("${:private}PCRELL${:uid}:\n\t",
|
||||
"add $dst, pc, #PCRELV${:uid}")),
|
||||
[]>;
|
||||
|
||||
def tLEApcrelCall : TI<(ops GPR:$dst, i32imm:$label),
|
||||
!strconcat(!strconcat(".set PCRELV${:uid}, (${label:call}-(",
|
||||
"${:private}PCRELL${:uid}+4))\n"),
|
||||
!strconcat("${:private}PCRELL${:uid}:\n\t",
|
||||
"add $dst, pc, #PCRELV${:uid}")),
|
||||
[]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Non-Instruction Patterns
|
||||
//
|
||||
|
||||
// ConstantPool, GlobalAddress
|
||||
def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
|
||||
def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
|
||||
def : ThumbPat<(ARMWrapperCall tglobaladdr :$dst),
|
||||
(tLEApcrelCall tglobaladdr :$dst)>;
|
||||
def : ThumbPat<(ARMWrapperCall texternalsym:$dst),
|
||||
(tLEApcrelCall texternalsym:$dst)>;
|
||||
|
||||
// Direct calls
|
||||
def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
|
||||
def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
|
||||
|
||||
// Indirect calls to ARM routines
|
||||
def : ThumbV5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>;
|
||||
|
||||
// zextload i1 -> zextload i8
|
||||
def : ThumbPat<(zextloadi1 t_addrmode_ri5_1:$addr),
|
||||
(tLDRBri t_addrmode_ri5_1:$addr)>;
|
||||
def : ThumbPat<(zextloadi1 t_addrmode_rr:$addr),
|
||||
(tLDRBri t_addrmode_rr:$addr)>;
|
||||
|
||||
// truncstore i1 -> truncstore i8
|
||||
def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_ri5_1:$dst),
|
||||
(tSTRBri GPR:$src, t_addrmode_ri5_1:$dst)>;
|
||||
def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_rr:$dst),
|
||||
(tSTRBrr GPR:$src, t_addrmode_rr:$dst)>;
|
||||
|
||||
// Large immediate handling.
|
||||
|
||||
// Two piece imms.
|
||||
def : ThumbPat<(i32 thumb_immshifted:$src),
|
||||
(tLSLri (tMOVri8 (thumb_immshifted_val imm:$src)),
|
||||
(thumb_immshifted_shamt imm:$src))>;
|
||||
|
||||
def : ThumbPat<(i32 imm0_255_comp:$src),
|
||||
(tMVN (tMOVri8 (imm_comp_XFORM imm:$src)))>;
|
359
lib/Target/ARM/ARMInstrVFP.td
Normal file
359
lib/Target/ARM/ARMInstrVFP.td
Normal file
@ -0,0 +1,359 @@
|
||||
//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Chris Lattner and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the ARM VP instruction set.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM VFP Instruction templates.
|
||||
//
|
||||
|
||||
// ARM Float Instruction
|
||||
class ASI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
|
||||
// TODO: Mark the instructions with the appropriate subtarget info.
|
||||
}
|
||||
|
||||
class ASI5<dag ops, string asm, list<dag> pattern>
|
||||
: I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
|
||||
// TODO: Mark the instructions with the appropriate subtarget info.
|
||||
}
|
||||
|
||||
// ARM Double Instruction
|
||||
class ADI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
|
||||
// TODO: Mark the instructions with the appropriate subtarget info.
|
||||
}
|
||||
|
||||
class ADI5<dag ops, string asm, list<dag> pattern>
|
||||
: I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
|
||||
// TODO: Mark the instructions with the appropriate subtarget info.
|
||||
}
|
||||
|
||||
def SDT_FTOI :
|
||||
SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
|
||||
def SDT_ITOF :
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
|
||||
def SDT_CMPFP0 :
|
||||
SDTypeProfile<0, 1, [SDTCisFP<0>]>;
|
||||
def SDT_FMDRR :
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
|
||||
SDTCisSameAs<1, 2>]>;
|
||||
|
||||
def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
|
||||
def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
|
||||
def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
|
||||
def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
|
||||
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTRet, [SDNPInFlag,SDNPOutFlag]>;
|
||||
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
|
||||
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>;
|
||||
def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load / store Instructions.
|
||||
//
|
||||
|
||||
let isLoad = 1 in {
|
||||
def FLDD : ADI5<(ops DPR:$dst, addrmode5:$addr),
|
||||
"fldd $dst, $addr",
|
||||
[(set DPR:$dst, (load addrmode5:$addr))]>;
|
||||
|
||||
def FLDS : ASI5<(ops SPR:$dst, addrmode5:$addr),
|
||||
"flds $dst, $addr",
|
||||
[(set SPR:$dst, (load addrmode5:$addr))]>;
|
||||
} // isLoad
|
||||
|
||||
let isStore = 1 in {
|
||||
def FSTD : ADI5<(ops DPR:$src, addrmode5:$addr),
|
||||
"fstd $src, $addr",
|
||||
[(store DPR:$src, addrmode5:$addr)]>;
|
||||
|
||||
def FSTS : ASI5<(ops SPR:$src, addrmode5:$addr),
|
||||
"fsts $src, $addr",
|
||||
[(store SPR:$src, addrmode5:$addr)]>;
|
||||
} // isStore
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load / store multiple Instructions.
|
||||
//
|
||||
|
||||
let isLoad = 1 in {
|
||||
def FLDMD : ADI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
|
||||
"fldm${addr:submode}d ${addr:base}, $dst1",
|
||||
[]>;
|
||||
|
||||
def FLDMS : ASI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
|
||||
"fldm${addr:submode}s ${addr:base}, $dst1",
|
||||
[]>;
|
||||
} // isLoad
|
||||
|
||||
let isStore = 1 in {
|
||||
def FSTMD : ADI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
|
||||
"fstm${addr:submode}d ${addr:base}, $src1",
|
||||
[]>;
|
||||
|
||||
def FSTMS : ASI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
|
||||
"fstm${addr:submode}s ${addr:base}, $src1",
|
||||
[]>;
|
||||
} // isStore
|
||||
|
||||
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP Binary Operations.
|
||||
//
|
||||
|
||||
def FADDD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
|
||||
"faddd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
|
||||
|
||||
def FADDS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
|
||||
"fadds $dst, $a, $b",
|
||||
[(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
|
||||
|
||||
def FCMPED : ADI<(ops DPR:$a, DPR:$b),
|
||||
"fcmped $a, $b",
|
||||
[(arm_cmpfp DPR:$a, DPR:$b)]>;
|
||||
|
||||
def FCMPES : ASI<(ops SPR:$a, SPR:$b),
|
||||
"fcmpes $a, $b",
|
||||
[(arm_cmpfp SPR:$a, SPR:$b)]>;
|
||||
|
||||
def FDIVD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
|
||||
"fdivd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
|
||||
|
||||
def FDIVS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
|
||||
"fdivs $dst, $a, $b",
|
||||
[(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
|
||||
|
||||
def FMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
|
||||
"fmuld $dst, $a, $b",
|
||||
[(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
|
||||
|
||||
def FMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
|
||||
"fmuls $dst, $a, $b",
|
||||
[(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
|
||||
|
||||
|
||||
def FNMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
|
||||
"fnmuld $dst, $a, $b",
|
||||
[(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>;
|
||||
|
||||
def FNMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
|
||||
"fnmuls $dst, $a, $b",
|
||||
[(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
|
||||
|
||||
def FSUBD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
|
||||
"fsubd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>;
|
||||
|
||||
def FSUBS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
|
||||
"fsubs $dst, $a, $b",
|
||||
[(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP Unary Operations.
|
||||
//
|
||||
|
||||
def FABSD : ADI<(ops DPR:$dst, DPR:$a),
|
||||
"fabsd $dst, $a",
|
||||
[(set DPR:$dst, (fabs DPR:$a))]>;
|
||||
|
||||
def FABSS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"fabss $dst, $a",
|
||||
[(set SPR:$dst, (fabs SPR:$a))]>;
|
||||
|
||||
def FCMPEZD : ADI<(ops DPR:$a),
|
||||
"fcmpezd $a",
|
||||
[(arm_cmpfp0 DPR:$a)]>;
|
||||
|
||||
def FCMPEZS : ASI<(ops SPR:$a),
|
||||
"fcmpezs $a",
|
||||
[(arm_cmpfp0 SPR:$a)]>;
|
||||
|
||||
def FCVTDS : ADI<(ops DPR:$dst, SPR:$a),
|
||||
"fcvtds $dst, $a",
|
||||
[(set DPR:$dst, (fextend SPR:$a))]>;
|
||||
|
||||
def FCVTSD : ADI<(ops SPR:$dst, DPR:$a),
|
||||
"fcvtsd $dst, $a",
|
||||
[(set SPR:$dst, (fround DPR:$a))]>;
|
||||
|
||||
def FCPYD : ADI<(ops DPR:$dst, DPR:$a),
|
||||
"fcpyd $dst, $a",
|
||||
[/*(set DPR:$dst, DPR:$a)*/]>;
|
||||
|
||||
def FCPYS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"fcpys $dst, $a",
|
||||
[/*(set SPR:$dst, SPR:$a)*/]>;
|
||||
|
||||
def FNEGD : ADI<(ops DPR:$dst, DPR:$a),
|
||||
"fnegd $dst, $a",
|
||||
[(set DPR:$dst, (fneg DPR:$a))]>;
|
||||
|
||||
def FNEGS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"fnegs $dst, $a",
|
||||
[(set SPR:$dst, (fneg SPR:$a))]>;
|
||||
|
||||
def FSQRTD : ADI<(ops DPR:$dst, DPR:$a),
|
||||
"fsqrtd $dst, $a",
|
||||
[(set DPR:$dst, (fsqrt DPR:$a))]>;
|
||||
|
||||
def FSQRTS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"fsqrts $dst, $a",
|
||||
[(set SPR:$dst, (fsqrt SPR:$a))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP <-> GPR Copies. Int <-> FP Conversions.
|
||||
//
|
||||
|
||||
def IMPLICIT_DEF_SPR : PseudoInst<(ops SPR:$rD),
|
||||
"@ IMPLICIT_DEF_SPR $rD",
|
||||
[(set SPR:$rD, (undef))]>;
|
||||
def IMPLICIT_DEF_DPR : PseudoInst<(ops DPR:$rD),
|
||||
"@ IMPLICIT_DEF_DPR $rD",
|
||||
[(set DPR:$rD, (undef))]>;
|
||||
|
||||
def FMRS : ASI<(ops GPR:$dst, SPR:$src),
|
||||
"fmrs $dst, $src",
|
||||
[(set GPR:$dst, (bitconvert SPR:$src))]>;
|
||||
|
||||
def FMSR : ASI<(ops SPR:$dst, GPR:$src),
|
||||
"fmsr $dst, $src",
|
||||
[(set SPR:$dst, (bitconvert GPR:$src))]>;
|
||||
|
||||
|
||||
def FMRRD : ADI<(ops GPR:$dst1, GPR:$dst2, DPR:$src),
|
||||
"fmrrd $dst1, $dst2, $src",
|
||||
[/* FIXME: Can't write pattern for multiple result instr*/]>;
|
||||
|
||||
// FMDHR: GPR -> SPR
|
||||
// FMDLR: GPR -> SPR
|
||||
|
||||
def FMDRR : ADI<(ops DPR:$dst, GPR:$src1, GPR:$src2),
|
||||
"fmdrr $dst, $src1, $src2",
|
||||
[(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
|
||||
|
||||
// FMRDH: SPR -> GPR
|
||||
// FMRDL: SPR -> GPR
|
||||
// FMRRS: SPR -> GPR
|
||||
// FMRX : SPR system reg -> GPR
|
||||
|
||||
// FMSRR: GPR -> SPR
|
||||
|
||||
|
||||
def FMSTAT : ASI<(ops), "fmstat", [(arm_fmstat)]>;
|
||||
|
||||
// FMXR: GPR -> VFP Sstem reg
|
||||
|
||||
|
||||
// Int to FP:
|
||||
|
||||
def FSITOD : ADI<(ops DPR:$dst, SPR:$a),
|
||||
"fsitod $dst, $a",
|
||||
[(set DPR:$dst, (arm_sitof SPR:$a))]>;
|
||||
|
||||
def FSITOS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"fsitos $dst, $a",
|
||||
[(set SPR:$dst, (arm_sitof SPR:$a))]>;
|
||||
|
||||
def FUITOD : ADI<(ops DPR:$dst, SPR:$a),
|
||||
"fuitod $dst, $a",
|
||||
[(set DPR:$dst, (arm_uitof SPR:$a))]>;
|
||||
|
||||
def FUITOS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"fuitos $dst, $a",
|
||||
[(set SPR:$dst, (arm_uitof SPR:$a))]>;
|
||||
|
||||
// FP to Int:
|
||||
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
|
||||
|
||||
def FTOSIZD : ADI<(ops SPR:$dst, DPR:$a),
|
||||
"ftosizd $dst, $a",
|
||||
[(set SPR:$dst, (arm_ftosi DPR:$a))]>;
|
||||
|
||||
def FTOSIZS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"ftosizs $dst, $a",
|
||||
[(set SPR:$dst, (arm_ftosi SPR:$a))]>;
|
||||
|
||||
def FTOUIZD : ADI<(ops SPR:$dst, DPR:$a),
|
||||
"ftouizd $dst, $a",
|
||||
[(set SPR:$dst, (arm_ftoui DPR:$a))]>;
|
||||
|
||||
def FTOUIZS : ASI<(ops SPR:$dst, SPR:$a),
|
||||
"ftouizs $dst, $a",
|
||||
[(set SPR:$dst, (arm_ftoui SPR:$a))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP FMA Operations.
|
||||
//
|
||||
|
||||
def FMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
|
||||
"fmacd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
|
||||
"fmacs $dst, $a, $b",
|
||||
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
|
||||
"fmscd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
|
||||
"fmscs $dst, $a, $b",
|
||||
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FNMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
|
||||
"fnmacd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FNMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
|
||||
"fnmacs $dst, $a, $b",
|
||||
[(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FNMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
|
||||
"fnmscd $dst, $a, $b",
|
||||
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
def FNMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
|
||||
"fnmscs $dst, $a, $b",
|
||||
[(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
|
||||
RegConstraint<"$dstin = $dst">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP Conditional moves.
|
||||
//
|
||||
|
||||
def FCPYDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
|
||||
"fcpyd$cc $dst, $true",
|
||||
[(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))]>,
|
||||
RegConstraint<"$false = $dst">;
|
||||
|
||||
def FCPYScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
|
||||
"fcpys$cc $dst, $true",
|
||||
[(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))]>,
|
||||
RegConstraint<"$false = $dst">;
|
||||
|
||||
def FNEGDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
|
||||
"fnegd$cc $dst, $true",
|
||||
[(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))]>,
|
||||
RegConstraint<"$false = $dst">;
|
||||
|
||||
def FNEGScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
|
||||
"fnegs$cc $dst, $true",
|
||||
[(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))]>,
|
||||
RegConstraint<"$false = $dst">;
|
628
lib/Target/ARM/ARMLoadStoreOptimizer.cpp
Normal file
628
lib/Target/ARM/ARMLoadStoreOptimizer.cpp
Normal file
@ -0,0 +1,628 @@
|
||||
//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Evan Cheng and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that performs load / store related peephole
|
||||
// optimizations. This pass should be run after register allocation.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm-ldst-opt"
|
||||
#include "ARM.h"
|
||||
#include "ARMAddressingModes.h"
|
||||
#include "ARMRegisterInfo.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
|
||||
STATISTIC(NumSTMGened , "Number of stm instructions generated");
|
||||
STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
|
||||
STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
|
||||
|
||||
namespace {
|
||||
struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &Fn);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM load / store optimization pass";
|
||||
}
|
||||
|
||||
private:
|
||||
struct MemOpQueueEntry {
|
||||
int Offset;
|
||||
unsigned Position;
|
||||
MachineBasicBlock::iterator MBBI;
|
||||
bool Merged;
|
||||
MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
|
||||
: Offset(o), Position(p), MBBI(i), Merged(false) {};
|
||||
};
|
||||
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
|
||||
typedef MemOpQueue::iterator MemOpQueueIter;
|
||||
|
||||
SmallVector<MachineBasicBlock::iterator, 4>
|
||||
MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
|
||||
int Opcode, unsigned Size, MemOpQueue &MemOps);
|
||||
|
||||
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
|
||||
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
|
||||
};
|
||||
}
|
||||
|
||||
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
|
||||
/// optimization pass.
|
||||
FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
|
||||
return new ARMLoadStoreOpt();
|
||||
}
|
||||
|
||||
static int getLoadStoreMultipleOpcode(int Opcode) {
|
||||
switch (Opcode) {
|
||||
case ARM::LDR:
|
||||
NumLDMGened++;
|
||||
return ARM::LDM;
|
||||
case ARM::STR:
|
||||
NumSTMGened++;
|
||||
return ARM::STM;
|
||||
case ARM::FLDS:
|
||||
NumFLDMGened++;
|
||||
return ARM::FLDMS;
|
||||
case ARM::FSTS:
|
||||
NumFSTMGened++;
|
||||
return ARM::FSTMS;
|
||||
case ARM::FLDD:
|
||||
NumFLDMGened++;
|
||||
return ARM::FLDMD;
|
||||
case ARM::FSTD:
|
||||
NumFSTMGened++;
|
||||
return ARM::FSTMD;
|
||||
default: abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// mergeOps - Create and insert a LDM or STM with Base as base register and
|
||||
/// registers in Regs as the register operands that would be loaded / stored.
|
||||
/// It returns true if the transformation is done.
|
||||
static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
int Offset, unsigned Base, int Opcode,
|
||||
SmallVector<unsigned, 8> &Regs,
|
||||
const TargetInstrInfo *TII) {
|
||||
// Only a single register to load / store. Don't bother.
|
||||
unsigned NumRegs = Regs.size();
|
||||
if (NumRegs <= 1)
|
||||
return false;
|
||||
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::ia;
|
||||
bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
|
||||
if (isAM4 && Offset == 4)
|
||||
Mode = ARM_AM::ib;
|
||||
else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
|
||||
Mode = ARM_AM::da;
|
||||
else if (isAM4 && Offset == -4 * (int)NumRegs)
|
||||
Mode = ARM_AM::db;
|
||||
else if (Offset != 0) {
|
||||
// If starting offset isn't zero, insert a MI to materialize a new base.
|
||||
// But only do so if it is cost effective, i.e. merging more than two
|
||||
// loads / stores.
|
||||
if (NumRegs <= 2)
|
||||
return false;
|
||||
|
||||
unsigned NewBase;
|
||||
if (Opcode == ARM::LDR)
|
||||
// If it is a load, then just use one of the destination register to
|
||||
// use as the new base.
|
||||
NewBase = Regs[NumRegs-1];
|
||||
else {
|
||||
// FIXME: Try scavenging a register to use as a new base.
|
||||
NewBase = ARM::R12;
|
||||
}
|
||||
int BaseOpc = ARM::ADDri;
|
||||
if (Offset < 0) {
|
||||
BaseOpc = ARM::SUBri;
|
||||
Offset = - Offset;
|
||||
}
|
||||
int ImmedOffset = ARM_AM::getSOImmVal(Offset);
|
||||
if (ImmedOffset == -1)
|
||||
return false; // Probably not worth it then.
|
||||
BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase).addReg(Base).addImm(ImmedOffset);
|
||||
Base = NewBase;
|
||||
}
|
||||
|
||||
bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
|
||||
bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
|
||||
Opcode = getLoadStoreMultipleOpcode(Opcode);
|
||||
MachineInstrBuilder MIB = (isAM4)
|
||||
? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
|
||||
.addImm(ARM_AM::getAM4ModeImm(Mode))
|
||||
: BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
|
||||
.addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs));
|
||||
for (unsigned i = 0; i != NumRegs; ++i)
|
||||
MIB = MIB.addReg(Regs[i], Opcode == isDef);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SmallVector<MachineBasicBlock::iterator, 4>
|
||||
ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB,
|
||||
unsigned SIndex, unsigned Base, int Opcode,
|
||||
unsigned Size, MemOpQueue &MemOps) {
|
||||
bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
|
||||
SmallVector<MachineBasicBlock::iterator, 4> Merges;
|
||||
int Offset = MemOps[SIndex].Offset;
|
||||
int SOffset = Offset;
|
||||
unsigned Pos = MemOps[SIndex].Position;
|
||||
MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
|
||||
SmallVector<unsigned, 8> Regs;
|
||||
unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
|
||||
unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
|
||||
Regs.push_back(PReg);
|
||||
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
|
||||
int NewOffset = MemOps[i].Offset;
|
||||
unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
|
||||
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
|
||||
// AM4 - register numbers in ascending order.
|
||||
// AM5 - consecutive register numbers in ascending order.
|
||||
if (NewOffset == Offset + (int)Size &&
|
||||
((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
|
||||
Offset += Size;
|
||||
Regs.push_back(Reg);
|
||||
PRegNum = RegNum;
|
||||
} else {
|
||||
// Can't merge this in. Try merge the earlier ones first.
|
||||
if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
|
||||
Merges.push_back(prior(Loc));
|
||||
for (unsigned j = SIndex; j < i; ++j) {
|
||||
MBB.erase(MemOps[j].MBBI);
|
||||
MemOps[j].Merged = true;
|
||||
}
|
||||
}
|
||||
SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
|
||||
MergeLDR_STR(MBB, i, Base, Opcode, Size, MemOps);
|
||||
Merges.append(Merges2.begin(), Merges2.end());
|
||||
return Merges;
|
||||
}
|
||||
|
||||
if (MemOps[i].Position > Pos) {
|
||||
Pos = MemOps[i].Position;
|
||||
Loc = MemOps[i].MBBI;
|
||||
}
|
||||
}
|
||||
|
||||
if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
|
||||
Merges.push_back(prior(Loc));
|
||||
for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
|
||||
MBB.erase(MemOps[i].MBBI);
|
||||
MemOps[i].Merged = true;
|
||||
}
|
||||
}
|
||||
|
||||
return Merges;
|
||||
}
|
||||
|
||||
static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes) {
|
||||
return (MI && MI->getOpcode() == ARM::SUBri &&
|
||||
MI->getOperand(0).getReg() == Base &&
|
||||
MI->getOperand(1).getReg() == Base &&
|
||||
ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
|
||||
}
|
||||
|
||||
static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes) {
|
||||
return (MI && MI->getOpcode() == ARM::ADDri &&
|
||||
MI->getOperand(0).getReg() == Base &&
|
||||
MI->getOperand(1).getReg() == Base &&
|
||||
ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
|
||||
}
|
||||
|
||||
static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
default: return 0;
|
||||
case ARM::LDR:
|
||||
case ARM::STR:
|
||||
case ARM::FLDS:
|
||||
case ARM::FSTS:
|
||||
return 4;
|
||||
case ARM::FLDD:
|
||||
case ARM::FSTD:
|
||||
return 8;
|
||||
case ARM::LDM:
|
||||
case ARM::STM:
|
||||
return (MI->getNumOperands() - 2) * 4;
|
||||
case ARM::FLDMS:
|
||||
case ARM::FSTMS:
|
||||
case ARM::FLDMD:
|
||||
case ARM::FSTMD:
|
||||
return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
|
||||
}
|
||||
}
|
||||
|
||||
/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
|
||||
/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
|
||||
///
|
||||
/// stmia rn, <ra, rb, rc>
|
||||
/// rn := rn + 4 * 3;
|
||||
/// =>
|
||||
/// stmia rn!, <ra, rb, rc>
|
||||
///
|
||||
/// rn := rn - 4 * 3;
|
||||
/// ldmia rn, <ra, rb, rc>
|
||||
/// =>
|
||||
/// ldmdb rn!, <ra, rb, rc>
|
||||
static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
MachineInstr *MI = MBBI;
|
||||
unsigned Base = MI->getOperand(0).getReg();
|
||||
unsigned Bytes = getLSMultipleTransferSize(MI);
|
||||
int Opcode = MI->getOpcode();
|
||||
bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
|
||||
|
||||
if (isAM4) {
|
||||
if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
|
||||
return false;
|
||||
|
||||
// Can't use the updating AM4 sub-mode if the base register is also a dest
|
||||
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
|
||||
for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) {
|
||||
if (MI->getOperand(i).getReg() == Base)
|
||||
return false;
|
||||
}
|
||||
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
|
||||
if (MBBI != MBB.begin()) {
|
||||
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes)) {
|
||||
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
|
||||
MBB.erase(PrevMBBI);
|
||||
return true;
|
||||
} else if (Mode == ARM_AM::ib &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes)) {
|
||||
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
|
||||
MBB.erase(PrevMBBI);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (MBBI != MBB.end()) {
|
||||
MachineBasicBlock::iterator NextMBBI = next(MBBI);
|
||||
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes)) {
|
||||
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
|
||||
MBB.erase(NextMBBI);
|
||||
return true;
|
||||
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes)) {
|
||||
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
|
||||
MBB.erase(NextMBBI);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
|
||||
if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
|
||||
return false;
|
||||
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
|
||||
unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
|
||||
if (MBBI != MBB.begin()) {
|
||||
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes)) {
|
||||
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
|
||||
MBB.erase(PrevMBBI);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (MBBI != MBB.end()) {
|
||||
MachineBasicBlock::iterator NextMBBI = next(MBBI);
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes)) {
|
||||
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
|
||||
MBB.erase(NextMBBI);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case ARM::LDR: return ARM::LDR_PRE;
|
||||
case ARM::STR: return ARM::STR_PRE;
|
||||
case ARM::FLDS: return ARM::FLDMS;
|
||||
case ARM::FLDD: return ARM::FLDMD;
|
||||
case ARM::FSTS: return ARM::FSTMS;
|
||||
case ARM::FSTD: return ARM::FSTMD;
|
||||
default: abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case ARM::LDR: return ARM::LDR_POST;
|
||||
case ARM::STR: return ARM::STR_POST;
|
||||
case ARM::FLDS: return ARM::FLDMS;
|
||||
case ARM::FLDD: return ARM::FLDMD;
|
||||
case ARM::FSTS: return ARM::FSTMS;
|
||||
case ARM::FSTD: return ARM::FSTMD;
|
||||
default: abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
|
||||
/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
|
||||
static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const TargetInstrInfo *TII) {
|
||||
MachineInstr *MI = MBBI;
|
||||
unsigned Base = MI->getOperand(1).getReg();
|
||||
unsigned Bytes = getLSMultipleTransferSize(MI);
|
||||
int Opcode = MI->getOpcode();
|
||||
bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
|
||||
if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
|
||||
(!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
|
||||
return false;
|
||||
|
||||
bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
|
||||
// Can't do the merge if the destination register is the same as the would-be
|
||||
// writeback register.
|
||||
if (isLd && MI->getOperand(0).getReg() == Base)
|
||||
return false;
|
||||
|
||||
bool DoMerge = false;
|
||||
ARM_AM::AddrOpc AddSub = ARM_AM::add;
|
||||
unsigned NewOpc = 0;
|
||||
if (MBBI != MBB.begin()) {
|
||||
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
|
||||
if (isMatchingDecrement(PrevMBBI, Base, Bytes)) {
|
||||
DoMerge = true;
|
||||
AddSub = ARM_AM::sub;
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
|
||||
} else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes)) {
|
||||
DoMerge = true;
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(PrevMBBI);
|
||||
}
|
||||
|
||||
if (!DoMerge && MBBI != MBB.end()) {
|
||||
MachineBasicBlock::iterator NextMBBI = next(MBBI);
|
||||
if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes)) {
|
||||
DoMerge = true;
|
||||
AddSub = ARM_AM::sub;
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
|
||||
} else if (isMatchingIncrement(NextMBBI, Base, Bytes)) {
|
||||
DoMerge = true;
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(NextMBBI);
|
||||
}
|
||||
|
||||
if (!DoMerge)
|
||||
return false;
|
||||
|
||||
bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
|
||||
unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
|
||||
: ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
|
||||
true, isDPR ? 2 : 1);
|
||||
if (isLd) {
|
||||
if (isAM2)
|
||||
BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, true).addReg(Base).addReg(0).addImm(Offset);
|
||||
else
|
||||
BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
|
||||
.addImm(Offset).addReg(MI->getOperand(0).getReg(), true);
|
||||
} else {
|
||||
if (isAM2)
|
||||
BuildMI(MBB, MBBI, TII->get(NewOpc), Base).addReg(MI->getOperand(0).getReg())
|
||||
.addReg(Base).addReg(0).addImm(Offset);
|
||||
else
|
||||
BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
|
||||
.addImm(Offset).addReg(MI->getOperand(0).getReg(), false);
|
||||
}
|
||||
MBB.erase(MBBI);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
|
||||
/// ops of the same base and incrementing offset into LDM / STM ops.
|
||||
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
||||
unsigned NumMerges = 0;
|
||||
unsigned NumMemOps = 0;
|
||||
MemOpQueue MemOps;
|
||||
unsigned CurrBase = 0;
|
||||
int CurrOpc = -1;
|
||||
unsigned CurrSize = 0;
|
||||
unsigned Position = 0;
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
while (MBBI != E) {
|
||||
bool Advance = false;
|
||||
bool TryMerge = false;
|
||||
bool Clobber = false;
|
||||
|
||||
int Opcode = MBBI->getOpcode();
|
||||
bool isMemOp = false;
|
||||
bool isAM2 = false;
|
||||
unsigned Size = 4;
|
||||
switch (Opcode) {
|
||||
case ARM::LDR:
|
||||
case ARM::STR:
|
||||
isMemOp =
|
||||
(MBBI->getOperand(1).isRegister() && MBBI->getOperand(2).getReg() == 0);
|
||||
isAM2 = true;
|
||||
break;
|
||||
case ARM::FLDS:
|
||||
case ARM::FSTS:
|
||||
isMemOp = MBBI->getOperand(1).isRegister();
|
||||
break;
|
||||
case ARM::FLDD:
|
||||
case ARM::FSTD:
|
||||
isMemOp = MBBI->getOperand(1).isRegister();
|
||||
Size = 8;
|
||||
break;
|
||||
}
|
||||
if (isMemOp) {
|
||||
unsigned Base = MBBI->getOperand(1).getReg();
|
||||
unsigned OffIdx = MBBI->getNumOperands()-1;
|
||||
unsigned OffField = MBBI->getOperand(OffIdx).getImm();
|
||||
int Offset = isAM2
|
||||
? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
|
||||
if (isAM2) {
|
||||
if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
|
||||
Offset = -Offset;
|
||||
} else {
|
||||
if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
|
||||
Offset = -Offset;
|
||||
}
|
||||
// Watch out for:
|
||||
// r4 := ldr [r5]
|
||||
// r5 := ldr [r5, #4]
|
||||
// r6 := ldr [r5, #8]
|
||||
//
|
||||
// The second ldr has effectively broken the chain even though it
|
||||
// looks like the later ldr(s) use the same base register. Try to
|
||||
// merge the ldr's so far, including this one. But don't try to
|
||||
// combine the following ldr(s).
|
||||
Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
|
||||
if (CurrBase == 0 && !Clobber) {
|
||||
// Start of a new chain.
|
||||
CurrBase = Base;
|
||||
CurrOpc = Opcode;
|
||||
CurrSize = Size;
|
||||
MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
|
||||
NumMemOps++;
|
||||
Advance = true;
|
||||
} else {
|
||||
if (Clobber) {
|
||||
TryMerge = true;
|
||||
Advance = true;
|
||||
}
|
||||
|
||||
if (CurrOpc == Opcode && CurrBase == Base) {
|
||||
// Continue adding to the queue.
|
||||
if (Offset > MemOps.back().Offset) {
|
||||
MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
|
||||
NumMemOps++;
|
||||
Advance = true;
|
||||
} else {
|
||||
for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
|
||||
I != E; ++I) {
|
||||
if (Offset < I->Offset) {
|
||||
MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
|
||||
NumMemOps++;
|
||||
Advance = true;
|
||||
break;
|
||||
} else if (Offset == I->Offset) {
|
||||
// Collision! This can't be merged!
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Advance) {
|
||||
++Position;
|
||||
++MBBI;
|
||||
} else
|
||||
TryMerge = true;
|
||||
|
||||
if (TryMerge) {
|
||||
if (NumMemOps > 1) {
|
||||
SmallVector<MachineBasicBlock::iterator,4> MBBII =
|
||||
MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,MemOps);
|
||||
// Try folding preceeding/trailing base inc/dec into the generated
|
||||
// LDM/STM ops.
|
||||
for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
|
||||
if (mergeBaseUpdateLSMultiple(MBB, MBBII[i]))
|
||||
NumMerges++;
|
||||
NumMerges += MBBII.size();
|
||||
}
|
||||
|
||||
// Try folding preceeding/trailing base inc/dec into those load/store
|
||||
// that were not merged to form LDM/STM ops.
|
||||
for (unsigned i = 0; i != NumMemOps; ++i)
|
||||
if (!MemOps[i].Merged)
|
||||
if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII))
|
||||
NumMerges++;
|
||||
|
||||
CurrBase = 0;
|
||||
CurrOpc = -1;
|
||||
if (NumMemOps) {
|
||||
MemOps.clear();
|
||||
NumMemOps = 0;
|
||||
}
|
||||
|
||||
// If iterator hasn't been advanced and this is not a memory op, skip it.
|
||||
// It can't start a new chain anyway.
|
||||
if (!Advance && !isMemOp && MBBI != E) {
|
||||
++Position;
|
||||
++MBBI;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NumMerges > 0;
|
||||
}
|
||||
|
||||
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
|
||||
/// (bx lr) into the preceeding stack restore so it directly restore the value
|
||||
/// of LR into pc.
|
||||
/// ldmfd sp!, {r7, lr}
|
||||
/// bx lr
|
||||
/// =>
|
||||
/// ldmfd sp!, {r7, pc}
|
||||
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
|
||||
if (MBB.empty()) return false;
|
||||
|
||||
MachineBasicBlock::iterator MBBI = prior(MBB.end());
|
||||
if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
|
||||
MachineInstr *PrevMI = prior(MBBI);
|
||||
if (PrevMI->getOpcode() == ARM::LDM) {
|
||||
MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
|
||||
if (MO.getReg() == ARM::LR) {
|
||||
PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET));
|
||||
MO.setReg(ARM::PC);
|
||||
MBB.erase(MBBI);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
||||
TII = Fn.getTarget().getInstrInfo();
|
||||
bool Modified = false;
|
||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||
++MFI) {
|
||||
MachineBasicBlock &MBB = *MFI;
|
||||
Modified |= LoadStoreMultipleOpti(MBB);
|
||||
Modified |= MergeReturnIntoLDM(MBB);
|
||||
}
|
||||
return Modified;
|
||||
}
|
136
lib/Target/ARM/ARMMachineFunctionInfo.h
Normal file
136
lib/Target/ARM/ARMMachineFunctionInfo.h
Normal file
@ -0,0 +1,136 @@
|
||||
//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by the Evan Cheng and is distributed under
|
||||
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares ARM-specific per-machine-function information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARMMACHINEFUNCTIONINFO_H
|
||||
#define ARMMACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "ARMSubtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// ARMFunctionInfo - This class is derived from MachineFunction private
|
||||
/// ARM target-specific information for each MachineFunction.
|
||||
class ARMFunctionInfo : public MachineFunctionInfo {
|
||||
|
||||
/// isThumb - True if this function is compiled under Thumb mode.
|
||||
///
|
||||
bool isThumb;
|
||||
|
||||
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
|
||||
///
|
||||
unsigned VarArgsRegSaveSize;
|
||||
|
||||
/// FramePtrSpilled - True if FP register is spilled. Set by
|
||||
/// processFunctionBeforeCalleeSavedScan().
|
||||
bool FramePtrSpilled;
|
||||
|
||||
/// FramePtrSpillOffset - If FramePtrSpilled, this records the frame pointer
|
||||
/// spill stack offset.
|
||||
unsigned FramePtrSpillOffset;
|
||||
|
||||
/// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
|
||||
/// register spills areas. For Mac OS X:
|
||||
///
|
||||
/// GPR callee-saved (1) : r4, r5, r6, r7, lr
|
||||
/// --------------------------------------------
|
||||
/// GPR callee-saved (2) : r8, r10, r11
|
||||
/// --------------------------------------------
|
||||
/// DPR callee-saved : d8 - d15
|
||||
unsigned GPRCS1Offset;
|
||||
unsigned GPRCS2Offset;
|
||||
unsigned DPRCSOffset;
|
||||
|
||||
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
|
||||
/// areas.
|
||||
unsigned GPRCS1Size;
|
||||
unsigned GPRCS2Size;
|
||||
unsigned DPRCSSize;
|
||||
|
||||
/// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
|
||||
/// which belong to these spill areas.
|
||||
std::set<int> GPRCS1Frames;
|
||||
std::set<int> GPRCS2Frames;
|
||||
std::set<int> DPRCSFrames;
|
||||
|
||||
/// JumpTableUId - Unique id for jumptables.
|
||||
///
|
||||
unsigned JumpTableUId;
|
||||
|
||||
public:
|
||||
ARMFunctionInfo() :
|
||||
isThumb(false),
|
||||
VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
|
||||
GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
||||
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
|
||||
|
||||
ARMFunctionInfo(MachineFunction &MF) :
|
||||
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
|
||||
VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
|
||||
GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
||||
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
|
||||
|
||||
bool isThumbFunction() const { return isThumb; }
|
||||
|
||||
unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
|
||||
void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
|
||||
|
||||
bool isFramePtrSpilled() const { return FramePtrSpilled; }
|
||||
void setFramePtrSpilled(bool s) { FramePtrSpilled = s; }
|
||||
unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
|
||||
void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
|
||||
|
||||
unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
|
||||
unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
|
||||
unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
|
||||
|
||||
void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
|
||||
void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
|
||||
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
|
||||
|
||||
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
|
||||
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
|
||||
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
|
||||
|
||||
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
|
||||
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
|
||||
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
|
||||
|
||||
bool isGPRCalleeSavedArea1Frame(unsigned fi) const {
|
||||
return GPRCS1Frames.count(fi);
|
||||
}
|
||||
bool isGPRCalleeSavedArea2Frame(unsigned fi) const {
|
||||
return GPRCS2Frames.count(fi);
|
||||
}
|
||||
bool isDPRCalleeSavedAreaFrame(unsigned fi) const {
|
||||
return DPRCSFrames.count(fi);
|
||||
}
|
||||
|
||||
void addGPRCalleeSavedArea1Frame(unsigned fi) {
|
||||
GPRCS1Frames.insert(fi);
|
||||
}
|
||||
void addGPRCalleeSavedArea2Frame(unsigned fi) {
|
||||
GPRCS2Frames.insert(fi);
|
||||
}
|
||||
void addDPRCalleeSavedAreaFrame(unsigned fi) {
|
||||
DPRCSFrames.insert(fi);
|
||||
}
|
||||
|
||||
unsigned createJumpTableUId() {
|
||||
return JumpTableUId++;
|
||||
}
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // ARMMACHINEFUNCTIONINFO_H
|
@ -1,75 +0,0 @@
|
||||
//===-- ARMMul.cpp - Define TargetMachine for A5CRM -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by the "Instituto Nokia de Tecnologia" and
|
||||
// is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Modify the ARM multiplication instructions so that Rd{Hi,Lo} and Rm are distinct
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "ARM.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class VISIBILITY_HIDDEN FixMul : public MachineFunctionPass {
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
};
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createARMFixMulPass() { return new FixMul(); }
|
||||
|
||||
bool FixMul::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool Changed = false;
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
|
||||
BB != E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr *MI = I;
|
||||
|
||||
int Op = MI->getOpcode();
|
||||
if (Op == ARM::MUL ||
|
||||
Op == ARM::SMULL ||
|
||||
Op == ARM::UMULL) {
|
||||
MachineOperand &RdOp = MI->getOperand(0);
|
||||
MachineOperand &RmOp = MI->getOperand(1);
|
||||
MachineOperand &RsOp = MI->getOperand(2);
|
||||
|
||||
unsigned Rd = RdOp.getReg();
|
||||
unsigned Rm = RmOp.getReg();
|
||||
unsigned Rs = RsOp.getReg();
|
||||
|
||||
if (Rd == Rm) {
|
||||
Changed = true;
|
||||
if (Rd != Rs) {
|
||||
//Rd and Rm must be distinct, but Rd can be equal to Rs.
|
||||
//Swap Rs and Rm
|
||||
RmOp.setReg(Rs);
|
||||
RsOp.setReg(Rm);
|
||||
} else {
|
||||
unsigned scratch = Op == ARM::MUL ? ARM::R12 : ARM::R0;
|
||||
BuildMI(MBB, I, MF.getTarget().getInstrInfo()->get(ARM::MOV),
|
||||
scratch).addReg(Rm).addImm(0).addImm(ARMShift::LSL);
|
||||
RmOp.setReg(scratch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -17,18 +17,36 @@
|
||||
|
||||
#include "llvm/Target/MRegisterInfo.h"
|
||||
#include "ARMGenRegisterInfo.h.inc"
|
||||
#include <set>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Type;
|
||||
class TargetInstrInfo;
|
||||
class TargetInstrInfo;
|
||||
class ARMSubtarget;
|
||||
class Type;
|
||||
|
||||
struct ARMRegisterInfo : public ARMGenRegisterInfo {
|
||||
const TargetInstrInfo &TII;
|
||||
const ARMSubtarget &STI;
|
||||
private:
|
||||
/// FramePtr - ARM physical register used as frame ptr.
|
||||
unsigned FramePtr;
|
||||
|
||||
ARMRegisterInfo(const TargetInstrInfo &tii);
|
||||
public:
|
||||
ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
|
||||
|
||||
/// getRegisterNumbering - Given the enum value for some register, e.g.
|
||||
/// ARM::LR, return the number that it corresponds to (e.g. 14).
|
||||
static unsigned getRegisterNumbering(unsigned RegEnum);
|
||||
|
||||
/// Code Generation virtual methods...
|
||||
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI) const;
|
||||
|
||||
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI) const;
|
||||
|
||||
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned SrcReg, int FrameIndex,
|
||||
@ -43,9 +61,8 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo {
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
const TargetRegisterClass *RC) const;
|
||||
|
||||
virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
|
||||
unsigned OpNum,
|
||||
int FrameIndex) const;
|
||||
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
|
||||
int FrameIndex) const;
|
||||
|
||||
const unsigned *getCalleeSavedRegs() const;
|
||||
|
||||
@ -57,7 +74,7 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo {
|
||||
|
||||
void eliminateFrameIndex(MachineBasicBlock::iterator II) const;
|
||||
|
||||
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
|
||||
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const;
|
||||
|
||||
void emitPrologue(MachineFunction &MF) const;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===- ARMRegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
|
||||
//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -13,129 +13,169 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Registers are identified with 4-bit ID numbers.
|
||||
class ARMReg<string n> : Register<n> {
|
||||
let Namespace = "ARM";
|
||||
}
|
||||
|
||||
// Ri - 32-bit integer registers
|
||||
class Ri<bits<4> num, string n> : ARMReg<n> {
|
||||
class ARMReg<bits<4> num, string n, list<Register> aliases = []> : Register<n> {
|
||||
field bits<4> Num;
|
||||
let Num = num;
|
||||
}
|
||||
// Rf - 32-bit floating-point registers
|
||||
class Rf<bits<5> num, string n> : ARMReg<n> {
|
||||
field bits<5> Num;
|
||||
let Num = num;
|
||||
}
|
||||
// Rd - Slots in the FP register file for 64-bit floating-point values.
|
||||
class Rd<bits<5> num, string n, list<Register> aliases> : ARMReg<n> {
|
||||
field bits<5> Num;
|
||||
let Num = num;
|
||||
let Namespace = "ARM";
|
||||
let Aliases = aliases;
|
||||
}
|
||||
|
||||
class ARMFReg<bits<5> num, string n> : Register<n> {
|
||||
field bits<5> Num;
|
||||
let Namespace = "ARM";
|
||||
}
|
||||
|
||||
// Integer registers
|
||||
def R0 : Ri< 0, "R0">, DwarfRegNum<0>;
|
||||
def R1 : Ri< 1, "R1">, DwarfRegNum<1>;
|
||||
def R2 : Ri< 2, "R2">, DwarfRegNum<2>;
|
||||
def R3 : Ri< 3, "R3">, DwarfRegNum<3>;
|
||||
def R4 : Ri< 4, "R4">, DwarfRegNum<4>;
|
||||
def R5 : Ri< 5, "R5">, DwarfRegNum<5>;
|
||||
def R6 : Ri< 6, "R6">, DwarfRegNum<6>;
|
||||
def R7 : Ri< 7, "R7">, DwarfRegNum<7>;
|
||||
def R8 : Ri< 8, "R8">, DwarfRegNum<8>;
|
||||
def R9 : Ri< 9, "R9">, DwarfRegNum<9>;
|
||||
def R10 : Ri<10, "R10">, DwarfRegNum<10>;
|
||||
def R11 : Ri<11, "R11">, DwarfRegNum<11>;
|
||||
def R12 : Ri<12, "R12">, DwarfRegNum<12>;
|
||||
def R13 : Ri<13, "R13">, DwarfRegNum<13>;
|
||||
def R14 : Ri<14, "R14">, DwarfRegNum<14>;
|
||||
def R15 : Ri<15, "R15">, DwarfRegNum<15>;
|
||||
def R0 : ARMReg< 0, "r0">, DwarfRegNum<0>;
|
||||
def R1 : ARMReg< 1, "r1">, DwarfRegNum<1>;
|
||||
def R2 : ARMReg< 2, "r2">, DwarfRegNum<2>;
|
||||
def R3 : ARMReg< 3, "r3">, DwarfRegNum<3>;
|
||||
def R4 : ARMReg< 4, "r4">, DwarfRegNum<4>;
|
||||
def R5 : ARMReg< 5, "r5">, DwarfRegNum<5>;
|
||||
def R6 : ARMReg< 6, "r6">, DwarfRegNum<6>;
|
||||
def R7 : ARMReg< 7, "r7">, DwarfRegNum<7>;
|
||||
def R8 : ARMReg< 8, "r8">, DwarfRegNum<8>;
|
||||
def R9 : ARMReg< 9, "r9">, DwarfRegNum<9>;
|
||||
def R10 : ARMReg<10, "r10">, DwarfRegNum<10>;
|
||||
def R11 : ARMReg<11, "r11">, DwarfRegNum<11>;
|
||||
def R12 : ARMReg<12, "r12">, DwarfRegNum<12>;
|
||||
def SP : ARMReg<13, "sp">, DwarfRegNum<13>;
|
||||
def LR : ARMReg<14, "lr">, DwarfRegNum<14>;
|
||||
def PC : ARMReg<15, "pc">, DwarfRegNum<15>;
|
||||
|
||||
// TODO: update to VFP-v3
|
||||
// Floating-point registers
|
||||
def S0 : Rf< 0, "S0">, DwarfRegNum<64>;
|
||||
def S1 : Rf< 1, "S1">, DwarfRegNum<65>;
|
||||
def S2 : Rf< 2, "S2">, DwarfRegNum<66>;
|
||||
def S3 : Rf< 3, "S3">, DwarfRegNum<67>;
|
||||
def S4 : Rf< 4, "S4">, DwarfRegNum<68>;
|
||||
def S5 : Rf< 5, "S5">, DwarfRegNum<69>;
|
||||
def S6 : Rf< 6, "S6">, DwarfRegNum<70>;
|
||||
def S7 : Rf< 7, "S7">, DwarfRegNum<71>;
|
||||
def S8 : Rf< 8, "S8">, DwarfRegNum<72>;
|
||||
def S9 : Rf< 9, "S9">, DwarfRegNum<73>;
|
||||
def S10 : Rf<10, "S10">, DwarfRegNum<74>;
|
||||
def S11 : Rf<11, "S11">, DwarfRegNum<75>;
|
||||
def S12 : Rf<12, "S12">, DwarfRegNum<76>;
|
||||
def S13 : Rf<13, "S13">, DwarfRegNum<77>;
|
||||
def S14 : Rf<14, "S14">, DwarfRegNum<78>;
|
||||
def S15 : Rf<15, "S15">, DwarfRegNum<79>;
|
||||
def S16 : Rf<16, "S16">, DwarfRegNum<80>;
|
||||
def S17 : Rf<17, "S17">, DwarfRegNum<81>;
|
||||
def S18 : Rf<18, "S18">, DwarfRegNum<82>;
|
||||
def S19 : Rf<19, "S19">, DwarfRegNum<83>;
|
||||
def S20 : Rf<20, "S20">, DwarfRegNum<84>;
|
||||
def S21 : Rf<21, "S21">, DwarfRegNum<85>;
|
||||
def S22 : Rf<22, "S22">, DwarfRegNum<86>;
|
||||
def S23 : Rf<23, "S23">, DwarfRegNum<87>;
|
||||
def S24 : Rf<24, "S24">, DwarfRegNum<88>;
|
||||
def S25 : Rf<25, "S25">, DwarfRegNum<89>;
|
||||
def S26 : Rf<26, "S26">, DwarfRegNum<90>;
|
||||
def S27 : Rf<27, "S27">, DwarfRegNum<91>;
|
||||
def S28 : Rf<28, "S28">, DwarfRegNum<92>;
|
||||
def S29 : Rf<29, "S29">, DwarfRegNum<93>;
|
||||
def S30 : Rf<30, "S30">, DwarfRegNum<94>;
|
||||
def S31 : Rf<31, "S31">, DwarfRegNum<95>;
|
||||
// Float registers
|
||||
def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
|
||||
def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
|
||||
def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
|
||||
def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
|
||||
def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
|
||||
def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
|
||||
def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
|
||||
def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
|
||||
def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
|
||||
def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
|
||||
def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
|
||||
def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
|
||||
def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
|
||||
def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
|
||||
def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
|
||||
def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
|
||||
|
||||
// Aliases of the S* registers used to hold 64-bit fp values (doubles)
|
||||
def D0 : Rd< 0, "D0", [S0, S1]>, DwarfRegNum<64>;
|
||||
def D1 : Rd< 2, "D1", [S2, S3]>, DwarfRegNum<66>;
|
||||
def D2 : Rd< 4, "D2", [S4, S5]>, DwarfRegNum<68>;
|
||||
def D3 : Rd< 6, "D3", [S6, S7]>, DwarfRegNum<70>;
|
||||
def D4 : Rd< 8, "D4", [S8, S9]>, DwarfRegNum<72>;
|
||||
def D5 : Rd<10, "D5", [S10, S11]>, DwarfRegNum<74>;
|
||||
def D6 : Rd<12, "D6", [S12, S13]>, DwarfRegNum<76>;
|
||||
def D7 : Rd<14, "D7", [S14, S15]>, DwarfRegNum<78>;
|
||||
def D8 : Rd<16, "D8", [S16, S17]>, DwarfRegNum<80>;
|
||||
def D9 : Rd<18, "D9", [S18, S19]>, DwarfRegNum<82>;
|
||||
def D10 : Rd<20, "D10", [S20, S21]>, DwarfRegNum<84>;
|
||||
def D11 : Rd<22, "D11", [S22, S23]>, DwarfRegNum<86>;
|
||||
def D12 : Rd<24, "D12", [S24, S25]>, DwarfRegNum<88>;
|
||||
def D13 : Rd<26, "D13", [S26, S27]>, DwarfRegNum<90>;
|
||||
def D14 : Rd<28, "D14", [S28, S29]>, DwarfRegNum<92>;
|
||||
def D15 : Rd<30, "D15", [S30, S31]>, DwarfRegNum<94>;
|
||||
// Aliases of the F* registers used to hold 64-bit fp values (doubles)
|
||||
def D0 : ARMReg< 0, "d0", [S0, S1]>;
|
||||
def D1 : ARMReg< 1, "d1", [S2, S3]>;
|
||||
def D2 : ARMReg< 2, "d2", [S4, S5]>;
|
||||
def D3 : ARMReg< 3, "d3", [S6, S7]>;
|
||||
def D4 : ARMReg< 4, "d4", [S8, S9]>;
|
||||
def D5 : ARMReg< 5, "d5", [S10, S11]>;
|
||||
def D6 : ARMReg< 6, "d6", [S12, S13]>;
|
||||
def D7 : ARMReg< 7, "d7", [S14, S15]>;
|
||||
def D8 : ARMReg< 8, "d8", [S16, S17]>;
|
||||
def D9 : ARMReg< 9, "d9", [S18, S19]>;
|
||||
def D10 : ARMReg<10, "d10", [S20, S21]>;
|
||||
def D11 : ARMReg<11, "d11", [S22, S23]>;
|
||||
def D12 : ARMReg<12, "d12", [S24, S25]>;
|
||||
def D13 : ARMReg<13, "d13", [S26, S27]>;
|
||||
def D14 : ARMReg<14, "d14", [S28, S29]>;
|
||||
def D15 : ARMReg<15, "d15", [S30, S31]>;
|
||||
|
||||
// Register classes.
|
||||
//
|
||||
// FIXME: the register order should be defined in terms of the preferred
|
||||
// allocation order...
|
||||
// pc == Program Counter
|
||||
// lr == Link Register
|
||||
// sp == Stack Pointer
|
||||
// r12 == ip (scratch)
|
||||
// r7 == Frame Pointer (thumb-style backtraces)
|
||||
// r11 == Frame Pointer (arm-style backtraces)
|
||||
// r10 == Stack Limit
|
||||
//
|
||||
def IntRegs : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
|
||||
R7, R8, R9, R10, R11, R12,
|
||||
R13, R14, R15]> {
|
||||
def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
|
||||
R7, R8, R9, R10, R12, R11,
|
||||
LR, SP, PC]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_begin(const MachineFunction &MF) const;
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
// FIXME: We are reserving r12 in case the PEI needs to use it to
|
||||
// generate large stack offset. Make it available once we have register
|
||||
// scavenging.
|
||||
let MethodBodies = [{
|
||||
IntRegsClass::iterator
|
||||
IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
|
||||
// r15 == Program Counter
|
||||
// r14 == Link Register
|
||||
// r13 == Stack Pointer
|
||||
// r12 == ip (scratch)
|
||||
// r11 == Frame Pointer
|
||||
// r10 == Stack Limit
|
||||
if (hasFP(MF))
|
||||
return end() - 5;
|
||||
else
|
||||
return end() - 4;
|
||||
// FP is R11, R9 is available.
|
||||
static const unsigned ARM_GPR_AO_1[] = {
|
||||
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
|
||||
ARM::R8, ARM::R9, ARM::R10,
|
||||
ARM::LR, ARM::R11 };
|
||||
// FP is R11, R9 is not available.
|
||||
static const unsigned ARM_GPR_AO_2[] = {
|
||||
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
|
||||
ARM::R8, ARM::R10,
|
||||
ARM::LR, ARM::R11 };
|
||||
// FP is R7, R9 is available.
|
||||
static const unsigned ARM_GPR_AO_3[] = {
|
||||
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R8,
|
||||
ARM::R9, ARM::R10,ARM::R11,
|
||||
ARM::LR, ARM::R7 };
|
||||
// FP is R7, R9 is not available.
|
||||
static const unsigned ARM_GPR_AO_4[] = {
|
||||
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R8,
|
||||
ARM::R10,ARM::R11,
|
||||
ARM::LR, ARM::R7 };
|
||||
// FP is R7, only low registers available.
|
||||
static const unsigned THUMB_GPR_AO[] = {
|
||||
ARM::R0, ARM::R1, ARM::R2,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
|
||||
|
||||
GPRClass::iterator
|
||||
GPRClass::allocation_order_begin(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
|
||||
if (Subtarget.isThumb())
|
||||
return THUMB_GPR_AO;
|
||||
if (Subtarget.useThumbBacktraces()) {
|
||||
if (Subtarget.isR9Reserved())
|
||||
return ARM_GPR_AO_4;
|
||||
else
|
||||
return ARM_GPR_AO_3;
|
||||
} else {
|
||||
if (Subtarget.isR9Reserved())
|
||||
return ARM_GPR_AO_2;
|
||||
else
|
||||
return ARM_GPR_AO_1;
|
||||
}
|
||||
}
|
||||
|
||||
GPRClass::iterator
|
||||
GPRClass::allocation_order_end(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
|
||||
GPRClass::iterator I;
|
||||
if (Subtarget.isThumb())
|
||||
I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
|
||||
else if (Subtarget.useThumbBacktraces()) {
|
||||
if (Subtarget.isR9Reserved())
|
||||
I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
|
||||
else
|
||||
I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
|
||||
} else {
|
||||
if (Subtarget.isR9Reserved())
|
||||
I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
|
||||
else
|
||||
I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
|
||||
}
|
||||
|
||||
return hasFP(MF) ? I-1 : I;
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
def FPRegs : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
|
||||
def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
|
||||
S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
|
||||
S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
|
||||
|
||||
def DFPRegs : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
|
||||
D8, D9, D10, D11, D12, D13, D14, D15]>;
|
||||
// ARM requires only word alignment for double. It's more performant if it
|
||||
// is double-word alignment though.
|
||||
def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
|
||||
D9, D10, D11, D12, D13, D14, D15]>;
|
||||
|
52
lib/Target/ARM/ARMSubtarget.cpp
Normal file
52
lib/Target/ARM/ARMSubtarget.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Evan Cheng and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARMSubtarget.h"
|
||||
#include "ARMGenSubtarget.inc"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
using namespace llvm;
|
||||
|
||||
// FIXME: this is temporary.
|
||||
static cl::opt<bool> Thumb("enable-thumb",
|
||||
cl::desc("Switch to thumb mode in ARM backend"));
|
||||
|
||||
ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS)
|
||||
: ARMArchVersion(V4T), HasVFP2(false), IsDarwin(false),
|
||||
UseThumbBacktraces(false), IsR9Reserved(false), stackAlignment(8) {
|
||||
|
||||
// Determine default and user specified characteristics
|
||||
std::string CPU = "generic";
|
||||
|
||||
// Parse features string.
|
||||
ParseSubtargetFeatures(FS, CPU);
|
||||
|
||||
IsThumb = Thumb;
|
||||
|
||||
// Set the boolean corresponding to the current target triple, or the default
|
||||
// if one cannot be determined, to true.
|
||||
const std::string& TT = M.getTargetTriple();
|
||||
if (TT.length() > 5) {
|
||||
IsDarwin = TT.find("-darwin") != std::string::npos;
|
||||
} else if (TT.empty()) {
|
||||
#if defined(__APPLE__)
|
||||
IsDarwin = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IsDarwin) {
|
||||
UseThumbBacktraces = true;
|
||||
IsR9Reserved = true;
|
||||
stackAlignment = 4;
|
||||
}
|
||||
}
|
82
lib/Target/ARM/ARMSubtarget.h
Normal file
82
lib/Target/ARM/ARMSubtarget.h
Normal file
@ -0,0 +1,82 @@
|
||||
//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Evan Cheng and is distributed under the
|
||||
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the ARM specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARMSUBTARGET_H
|
||||
#define ARMSUBTARGET_H
|
||||
|
||||
#include "llvm/Target/TargetSubtarget.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class Module;
|
||||
|
||||
class ARMSubtarget : public TargetSubtarget {
|
||||
protected:
|
||||
enum ARMArchEnum {
|
||||
V4T, V5T, V5TE, V6
|
||||
};
|
||||
|
||||
/// ARMArchVersion - ARM architecture vecrsion: V4T (base), V5T, V5TE,
|
||||
/// and V6.
|
||||
ARMArchEnum ARMArchVersion;
|
||||
|
||||
/// HasVFP2 - True if the processor supports Vector Floating Point (VFP) V2
|
||||
/// instructions.
|
||||
bool HasVFP2;
|
||||
|
||||
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
|
||||
bool IsThumb;
|
||||
|
||||
bool IsDarwin;
|
||||
|
||||
/// UseThumbBacktraces - True if we use thumb style backtraces.
|
||||
bool UseThumbBacktraces;
|
||||
|
||||
/// IsR9Reserved - True if R9 is a not available as general purpose register.
|
||||
bool IsR9Reserved;
|
||||
|
||||
/// stackAlignment - The minimum alignment known to hold of the stack frame on
|
||||
/// entry to the function and which must be maintained by every function.
|
||||
unsigned stackAlignment;
|
||||
|
||||
public:
|
||||
/// This constructor initializes the data members to match that
|
||||
/// of the specified module.
|
||||
///
|
||||
ARMSubtarget(const Module &M, const std::string &FS);
|
||||
|
||||
/// ParseSubtargetFeatures - Parses features string setting specified
|
||||
/// subtarget options. Definition of function is auto generated by tblgen.
|
||||
void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
|
||||
|
||||
bool hasV4TOps() const { return ARMArchVersion >= V4T; }
|
||||
bool hasV5TOps() const { return ARMArchVersion >= V5T; }
|
||||
bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
|
||||
bool hasV6Ops() const { return ARMArchVersion >= V6; }
|
||||
|
||||
bool hasVFP2() const { return HasVFP2; }
|
||||
|
||||
bool isDarwin() const { return IsDarwin; }
|
||||
bool isThumb() const { return IsThumb; }
|
||||
|
||||
bool useThumbBacktraces() const { return UseThumbBacktraces; }
|
||||
bool isR9Reserved() const { return IsR9Reserved; }
|
||||
|
||||
/// getStackAlignment - Returns the minimum alignment known to hold of the
|
||||
/// stack frame on entry to the function and which must be maintained by every
|
||||
/// function for this subtarget.
|
||||
unsigned getStackAlignment() const { return stackAlignment; }
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // ARMSUBTARGET_H
|
@ -12,18 +12,50 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARMTargetAsmInfo.h"
|
||||
|
||||
#include "ARMTargetMachine.h"
|
||||
using namespace llvm;
|
||||
|
||||
ARMTargetAsmInfo::ARMTargetAsmInfo(const ARMTargetMachine &TM) {
|
||||
Data16bitsDirective = "\t.half\t";
|
||||
Data32bitsDirective = "\t.word\t";
|
||||
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
|
||||
if (Subtarget->isDarwin()) {
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
PrivateGlobalPrefix = "L";
|
||||
GlobalPrefix = "_";
|
||||
ZeroDirective = "\t.space\t";
|
||||
SetDirective = "\t.set";
|
||||
WeakRefDirective = "\t.weak_reference\t";
|
||||
JumpTableDataSection = ".const";
|
||||
CStringSection = "\t.cstring";
|
||||
StaticCtorsSection = ".mod_init_func";
|
||||
StaticDtorsSection = ".mod_term_func";
|
||||
InlineAsmStart = "@ InlineAsm Start";
|
||||
InlineAsmEnd = "@ InlineAsm End";
|
||||
LCOMMDirective = "\t.lcomm\t";
|
||||
COMMDirectiveTakesAlignment = false;
|
||||
|
||||
NeedsSet = true;
|
||||
DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
|
||||
DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
|
||||
DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
|
||||
DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
|
||||
DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
|
||||
DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
|
||||
DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
|
||||
DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
|
||||
DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
|
||||
DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
|
||||
DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
|
||||
} else {
|
||||
Data16bitsDirective = "\t.half\t";
|
||||
Data32bitsDirective = "\t.word\t";
|
||||
ZeroDirective = "\t.skip\t";
|
||||
WeakRefDirective = "\t.weak\t";
|
||||
StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
|
||||
StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
|
||||
}
|
||||
AlignmentIsInBytes = false;
|
||||
Data64bitsDirective = 0;
|
||||
ZeroDirective = "\t.skip\t";
|
||||
CommentString = "@";
|
||||
DataSection = "\t.data";
|
||||
ConstantPoolSection = "\t.text\n";
|
||||
AlignmentIsInBytes = false;
|
||||
WeakRefDirective = "\t.weak\t";
|
||||
StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
|
||||
StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
|
||||
}
|
||||
|
@ -11,30 +11,32 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARMTargetAsmInfo.h"
|
||||
#include "ARMTargetMachine.h"
|
||||
#include "ARMTargetAsmInfo.h"
|
||||
#include "ARMFrameInfo.h"
|
||||
#include "ARM.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Target/TargetMachineRegistry.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
|
||||
cl::desc("Disable load store optimization pass"));
|
||||
|
||||
namespace {
|
||||
// Register the target.
|
||||
RegisterTarget<ARMTargetMachine> X("arm", " ARM");
|
||||
}
|
||||
|
||||
|
||||
const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
|
||||
return new ARMTargetAsmInfo(*this);
|
||||
}
|
||||
|
||||
|
||||
/// TargetMachine ctor - Create an ILP32 architecture model
|
||||
///
|
||||
ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
|
||||
: DataLayout("e-p:32:32") {
|
||||
: Subtarget(M, FS), DataLayout("e-p:32:32-d:32"), InstrInfo(Subtarget),
|
||||
FrameInfo(Subtarget) {
|
||||
if (Subtarget.isDarwin())
|
||||
NoFramePointerElim = true;
|
||||
}
|
||||
|
||||
unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
|
||||
@ -49,14 +51,23 @@ unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
|
||||
}
|
||||
|
||||
|
||||
const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
|
||||
return new ARMTargetAsmInfo(*this);
|
||||
}
|
||||
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
bool ARMTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
|
||||
PM.add(createARMISelDag(*this));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARMTargetMachine::addPostRegAlloc(FunctionPassManager &PM, bool Fast) {
|
||||
PM.add(createARMFixMulPass());
|
||||
bool ARMTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
|
||||
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
|
||||
if (!Fast && !DisableLdStOpti && !Subtarget.isThumb())
|
||||
PM.add(createARMLoadStoreOptimizationPass());
|
||||
|
||||
PM.add(createARMConstantIslandPass());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -20,19 +20,17 @@
|
||||
#include "llvm/Target/TargetFrameInfo.h"
|
||||
#include "ARMInstrInfo.h"
|
||||
#include "ARMFrameInfo.h"
|
||||
#include "ARMSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Module;
|
||||
|
||||
class ARMTargetMachine : public LLVMTargetMachine {
|
||||
const TargetData DataLayout; // Calculates type size & alignment
|
||||
ARMInstrInfo InstrInfo;
|
||||
ARMFrameInfo FrameInfo;
|
||||
|
||||
protected:
|
||||
virtual const TargetAsmInfo *createTargetAsmInfo() const;
|
||||
|
||||
ARMSubtarget Subtarget;
|
||||
const TargetData DataLayout; // Calculates type size & alignment
|
||||
ARMInstrInfo InstrInfo;
|
||||
ARMFrameInfo FrameInfo;
|
||||
public:
|
||||
ARMTargetMachine(const Module &M, const std::string &FS);
|
||||
|
||||
@ -42,11 +40,14 @@ public:
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
virtual const TargetData *getTargetData() const { return &DataLayout; }
|
||||
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
static unsigned getModuleMatchQuality(const Module &M);
|
||||
|
||||
virtual const TargetAsmInfo *createTargetAsmInfo() const;
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
|
||||
virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
|
||||
virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
|
||||
virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
|
||||
std::ostream &Out);
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../..
|
||||
LIBRARYNAME = LLVMARM
|
||||
TARGET = ARM
|
||||
@ -15,7 +16,6 @@ TARGET = ARM
|
||||
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
|
||||
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
|
||||
ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
|
||||
ARMGenDAGISel.inc
|
||||
ARMGenDAGISel.inc ARMGenSubtarget.inc
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
||||
|
||||
|
17
lib/Target/ARM/README-Thumb.txt
Normal file
17
lib/Target/ARM/README-Thumb.txt
Normal file
@ -0,0 +1,17 @@
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Random ideas for the ARM backend (Thumb specific).
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
* Add support for compiling functions in both ARM and Thumb mode, then taking
|
||||
the smallest.
|
||||
* Add support for compiling individual basic blocks in thumb mode, when in a
|
||||
larger ARM function. This can be used for presumed cold code, like paths
|
||||
to abort (failure path of asserts), EH handling code, etc.
|
||||
|
||||
* Thumb doesn't have normal pre/post increment addressing modes, but you can
|
||||
load/store 32-bit integers with pre/postinc by using load/store multiple
|
||||
instrs with a single register.
|
||||
|
||||
* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
|
||||
and cmp instructions can use high registers. Also, we can use them as
|
||||
temporaries to spill values into.
|
@ -2,69 +2,438 @@
|
||||
// Random ideas for the ARM backend.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Consider implementing a select with two conditional moves:
|
||||
Reimplement 'select' in terms of 'SEL'.
|
||||
|
||||
cmp x, y
|
||||
moveq dst, a
|
||||
movne dst, b
|
||||
* We would really like to support UXTAB16, but we need to prove that the
|
||||
add doesn't need to overflow between the two 16-bit chunks.
|
||||
|
||||
----------------------------------------------------------
|
||||
* implement predication support
|
||||
* Implement pre/post increment support. (e.g. PR935)
|
||||
* Coalesce stack slots!
|
||||
* Implement smarter constant generation for binops with large immediates.
|
||||
|
||||
* Consider materializing FP constants like 0.0f and 1.0f using integer
|
||||
immediate instructions then copy to FPU. Slower than load into FPU?
|
||||
|
||||
%tmp1 = shl int %b, ubyte %c
|
||||
%tmp4 = add int %a, %tmp1
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
compiles to
|
||||
The constant island pass is extremely naive. If a constant pool entry is
|
||||
out of range, it *always* splits a block and inserts a copy of the cp
|
||||
entry inline. It should:
|
||||
|
||||
add r0, r0, r1, lsl r2
|
||||
1. Check to see if there is already a copy of this constant nearby. If so,
|
||||
reuse it.
|
||||
2. Instead of always splitting blocks to insert the constant, insert it in
|
||||
nearby 'water'.
|
||||
3. Constant island references should be ref counted. If a constant reference
|
||||
is out-of-range, and the last reference to a constant is relocated, the
|
||||
dead constant should be removed.
|
||||
|
||||
but
|
||||
This pass has all the framework needed to implement this, but it hasn't
|
||||
been done.
|
||||
|
||||
%tmp1 = shl int %b, ubyte %c
|
||||
%tmp4 = add int %tmp1, %a
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
compiles to
|
||||
mov r1, r1, lsl r2
|
||||
add r0, r1, r0
|
||||
We need to start generating predicated instructions. The .td files have a way
|
||||
to express this now (see the PPC conditional return instruction), but the
|
||||
branch folding pass (or a new if-cvt pass) should start producing these, at
|
||||
least in the trivial case.
|
||||
|
||||
---------------------------------------------------------
|
||||
%tmp1 = shl int %b, ubyte 4
|
||||
%tmp2 = add int %a, %tmp1
|
||||
Among the obvious wins, doing so can eliminate the need to custom expand
|
||||
copysign (i.e. we won't need to custom expand it to get the conditional
|
||||
negate).
|
||||
|
||||
compiles to
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
mov r2, #4
|
||||
add r0, r0, r1, lsl r2
|
||||
Implement long long "X-3" with instructions that fold the immediate in. These
|
||||
were disabled due to badness with the ARM carry flag on subtracts.
|
||||
|
||||
should be
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
add r0, r0, r1, lsl #4
|
||||
We currently compile abs:
|
||||
int foo(int p) { return p < 0 ? -p : p; }
|
||||
|
||||
----------------------------------------------------------
|
||||
into:
|
||||
|
||||
add an offset to FLDS/FLDD/FSTD/FSTS addressing mode
|
||||
_foo:
|
||||
rsb r1, r0, #0
|
||||
cmn r0, #1
|
||||
movgt r1, r0
|
||||
mov r0, r1
|
||||
bx lr
|
||||
|
||||
----------------------------------------------------------
|
||||
This is very, uh, literal. This could be a 3 operation sequence:
|
||||
t = (p sra 31);
|
||||
res = (p xor t)-t
|
||||
|
||||
the function
|
||||
Which would be better. This occurs in png decode.
|
||||
|
||||
void %f() {
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
More load / store optimizations:
|
||||
1) Look past instructions without side-effects (not load, store, branch, etc.)
|
||||
when forming the list of loads / stores to optimize.
|
||||
|
||||
2) Smarter register allocation?
|
||||
We are probably missing some opportunities to use ldm / stm. Consider:
|
||||
|
||||
ldr r5, [r0]
|
||||
ldr r4, [r0, #4]
|
||||
|
||||
This cannot be merged into a ldm. Perhaps we will need to do the transformation
|
||||
before register allocation. Then teach the register allocator to allocate a
|
||||
chunk of consecutive registers.
|
||||
|
||||
3) Better representation for block transfer? This is from Olden/power:
|
||||
|
||||
fldd d0, [r4]
|
||||
fstd d0, [r4, #+32]
|
||||
fldd d0, [r4, #+8]
|
||||
fstd d0, [r4, #+40]
|
||||
fldd d0, [r4, #+16]
|
||||
fstd d0, [r4, #+48]
|
||||
fldd d0, [r4, #+24]
|
||||
fstd d0, [r4, #+56]
|
||||
|
||||
If we can spare the registers, it would be better to use fldm and fstm here.
|
||||
Need major register allocator enhancement though.
|
||||
|
||||
4) Can we recognize the relative position of constantpool entries? i.e. Treat
|
||||
|
||||
ldr r0, LCPI17_3
|
||||
ldr r1, LCPI17_4
|
||||
ldr r2, LCPI17_5
|
||||
|
||||
as
|
||||
ldr r0, LCPI17
|
||||
ldr r1, LCPI17+4
|
||||
ldr r2, LCPI17+8
|
||||
|
||||
Then the ldr's can be combined into a single ldm. See Olden/power.
|
||||
|
||||
Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
|
||||
double 64-bit FP constant:
|
||||
|
||||
adr r0, L6
|
||||
ldmia r0, {r0-r1}
|
||||
|
||||
.align 2
|
||||
L6:
|
||||
.long -858993459
|
||||
.long 1074318540
|
||||
|
||||
5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
|
||||
ldrd/strd instead if there are only two destination registers that form an
|
||||
odd/even pair. However, we probably would pay a penalty if the address is not
|
||||
aligned on 8-byte boundary. This requires more information on load / store
|
||||
nodes (and MI's?) then we currently carry.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
* Consider this silly example:
|
||||
|
||||
double bar(double x) {
|
||||
double r = foo(3.1);
|
||||
return x+r;
|
||||
}
|
||||
|
||||
_bar:
|
||||
sub sp, sp, #16
|
||||
str r4, [sp, #+12]
|
||||
str r5, [sp, #+8]
|
||||
str lr, [sp, #+4]
|
||||
mov r4, r0
|
||||
mov r5, r1
|
||||
ldr r0, LCPI2_0
|
||||
bl _foo
|
||||
fmsr f0, r0
|
||||
fcvtsd d0, f0
|
||||
fmdrr d1, r4, r5
|
||||
faddd d0, d0, d1
|
||||
fmrrd r0, r1, d0
|
||||
ldr lr, [sp, #+4]
|
||||
ldr r5, [sp, #+8]
|
||||
ldr r4, [sp, #+12]
|
||||
add sp, sp, #16
|
||||
bx lr
|
||||
|
||||
Ignore the prologue and epilogue stuff for a second. Note
|
||||
mov r4, r0
|
||||
mov r5, r1
|
||||
the copys to callee-save registers and the fact they are only being used by the
|
||||
fmdrr instruction. It would have been better had the fmdrr been scheduled
|
||||
before the call and place the result in a callee-save DPR register. The two
|
||||
mov ops would not have been necessary.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Calling convention related stuff:
|
||||
|
||||
* gcc's parameter passing implementation is terrible and we suffer as a result:
|
||||
|
||||
e.g.
|
||||
struct s {
|
||||
double d1;
|
||||
int s1;
|
||||
};
|
||||
|
||||
void foo(struct s S) {
|
||||
printf("%g, %d\n", S.d1, S.s1);
|
||||
}
|
||||
|
||||
'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
|
||||
then reload them to r1, r2, and r3 before issuing the call (r0 contains the
|
||||
address of the format string):
|
||||
|
||||
stmfd sp!, {r7, lr}
|
||||
add r7, sp, #0
|
||||
sub sp, sp, #12
|
||||
stmia sp, {r0, r1, r2}
|
||||
ldmia sp, {r1-r2}
|
||||
ldr r0, L5
|
||||
ldr r3, [sp, #8]
|
||||
L2:
|
||||
add r0, pc, r0
|
||||
bl L_printf$stub
|
||||
|
||||
Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
|
||||
|
||||
* Return an aggregate type is even worse:
|
||||
|
||||
e.g.
|
||||
struct s foo(void) {
|
||||
struct s S = {1.1, 2};
|
||||
return S;
|
||||
}
|
||||
|
||||
mov ip, r0
|
||||
ldr r0, L5
|
||||
sub sp, sp, #12
|
||||
L2:
|
||||
add r0, pc, r0
|
||||
@ lr needed for prologue
|
||||
ldmia r0, {r0, r1, r2}
|
||||
stmia sp, {r0, r1, r2}
|
||||
stmia ip, {r0, r1, r2}
|
||||
mov r0, ip
|
||||
add sp, sp, #12
|
||||
bx lr
|
||||
|
||||
r0 (and later ip) is the hidden parameter from caller to store the value in. The
|
||||
first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
|
||||
r2 into the address passed in. However, there is one additional stmia that
|
||||
stores r0, r1, and r2 to some stack location. The store is dead.
|
||||
|
||||
The llvm-gcc generated code looks like this:
|
||||
|
||||
csretcc void %foo(%struct.s* %agg.result) {
|
||||
entry:
|
||||
call void %g( int 1, int 2, int 3, int 4, int 5 )
|
||||
%S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1]
|
||||
%memtmp = alloca %struct.s ; <%struct.s*> [#uses=1]
|
||||
cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2]
|
||||
call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
|
||||
cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2]
|
||||
call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
|
||||
cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1]
|
||||
call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void %g(int, int, int, int, int)
|
||||
llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
|
||||
constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
|
||||
into a number of load and stores, or 2) custom lower memcpy (of small size) to
|
||||
be ldmia / stmia. I think option 2 is better but the current register
|
||||
allocator cannot allocate a chunk of registers at a time.
|
||||
|
||||
Only needs 8 bytes of stack space. We currently allocate 16.
|
||||
A feasible temporary solution is to use specific physical registers at the
|
||||
lowering time for small (<= 4 words?) transfer size.
|
||||
|
||||
----------------------------------------------------------
|
||||
* ARM CSRet calling convention requires the hidden argument to be returned by
|
||||
the callee.
|
||||
|
||||
32 x 32 -> 64 multiplications currently uses two instructions. We
|
||||
should try to declare smull and umull as returning two values.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
----------------------------------------------------------
|
||||
We can definitely do a better job on BB placements to eliminate some branches.
|
||||
It's very common to see llvm generated assembly code that looks like this:
|
||||
|
||||
Implement addressing modes 2 (ldrb) and 3 (ldrsb)
|
||||
LBB3:
|
||||
...
|
||||
LBB4:
|
||||
...
|
||||
beq LBB3
|
||||
b LBB2
|
||||
|
||||
----------------------------------------------------------
|
||||
If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
|
||||
then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
|
||||
|
||||
See McCat/18-imp/ComputeBoundingBoxes for an example.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We need register scavenging. Currently, the 'ip' register is reserved in case
|
||||
frame indexes are too big. This means that we generate extra code for stuff
|
||||
like this:
|
||||
|
||||
void foo(unsigned x, unsigned y, unsigned z, unsigned *a, unsigned *b, unsigned *c) {
|
||||
short Rconst = (short) (16384.0f * 1.40200 + 0.5 );
|
||||
*a = x * Rconst;
|
||||
*b = y * Rconst;
|
||||
*c = z * Rconst;
|
||||
}
|
||||
|
||||
we compile it to:
|
||||
|
||||
_foo:
|
||||
*** stmfd sp!, {r4, r7}
|
||||
*** add r7, sp, #4
|
||||
mov r4, #186
|
||||
orr r4, r4, #89, 24 @ 22784
|
||||
mul r0, r0, r4
|
||||
str r0, [r3]
|
||||
mul r0, r1, r4
|
||||
ldr r1, [sp, #+8]
|
||||
str r0, [r1]
|
||||
mul r0, r2, r4
|
||||
ldr r1, [sp, #+12]
|
||||
str r0, [r1]
|
||||
*** sub sp, r7, #4
|
||||
*** ldmfd sp!, {r4, r7}
|
||||
bx lr
|
||||
|
||||
GCC produces:
|
||||
|
||||
_foo:
|
||||
ldr ip, L4
|
||||
mul r0, ip, r0
|
||||
mul r1, ip, r1
|
||||
str r0, [r3, #0]
|
||||
ldr r3, [sp, #0]
|
||||
mul r2, ip, r2
|
||||
str r1, [r3, #0]
|
||||
ldr r3, [sp, #4]
|
||||
str r2, [r3, #0]
|
||||
bx lr
|
||||
L4:
|
||||
.long 22970
|
||||
|
||||
This is apparently all because we couldn't use ip here.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Pre-/post- indexed load / stores:
|
||||
|
||||
1) We should not make the pre/post- indexed load/store transform if the base ptr
|
||||
is guaranteed to be live beyond the load/store. This can happen if the base
|
||||
ptr is live out of the block we are performing the optimization. e.g.
|
||||
|
||||
mov r1, r2
|
||||
ldr r3, [r1], #4
|
||||
...
|
||||
|
||||
vs.
|
||||
|
||||
ldr r3, [r2]
|
||||
add r1, r2, #4
|
||||
...
|
||||
|
||||
In most cases, this is just a wasted optimization. However, sometimes it can
|
||||
negatively impact the performance because two-address code is more restrictive
|
||||
when it comes to scheduling.
|
||||
|
||||
Unfortunately, liveout information is currently unavailable during DAG combine
|
||||
time.
|
||||
|
||||
2) Consider spliting a indexed load / store into a pair of add/sub + load/store
|
||||
to solve #1 (in TwoAddressInstructionPass.cpp).
|
||||
|
||||
3) Enhance LSR to generate more opportunities for indexed ops.
|
||||
|
||||
4) Once we added support for multiple result patterns, write indexed loads
|
||||
patterns instead of C++ instruction selection code.
|
||||
|
||||
5) Use FLDM / FSTM to emulate indexed FP load / store.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We should add i64 support to take advantage of the 64-bit load / stores.
|
||||
We can add a pseudo i64 register class containing pseudo registers that are
|
||||
register pairs. All other ops (e.g. add, sub) would be expanded as usual.
|
||||
|
||||
We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
|
||||
from the i64 register. These are single moves which can be eliminated if the
|
||||
destination register is a sub-register of the source. We should implement proper
|
||||
subreg support in the register allocator to coalesce these away.
|
||||
|
||||
There are other minor issues such as multiple instructions for a spill / restore
|
||||
/ move.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Implement support for some more tricky ways to materialize immediates. For
|
||||
example, to get 0xffff8000, we can use:
|
||||
|
||||
mov r9, #&3f8000
|
||||
sub r9, r9, #&400000
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We sometimes generate multiple add / sub instructions to update sp in prologue
|
||||
and epilogue if the inc / dec value is too large to fit in a single immediate
|
||||
operand. In some cases, perhaps it might be better to load the value from a
|
||||
constantpool instead.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
GCC generates significantly better code for this function.
|
||||
|
||||
int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
|
||||
int i = 0;
|
||||
|
||||
if (StackPtr != 0) {
|
||||
while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
|
||||
Line[i++] = Stack[--StackPtr];
|
||||
if (LineLen > 32768)
|
||||
{
|
||||
while (StackPtr != 0 && i < LineLen)
|
||||
{
|
||||
i++;
|
||||
--StackPtr;
|
||||
}
|
||||
}
|
||||
}
|
||||
return StackPtr;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
This should compile to the mlas instruction:
|
||||
int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
At some point, we should triage these to see if they still apply to us:
|
||||
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
|
||||
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
|
||||
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
|
||||
|
||||
http://www.inf.u-szeged.hu/gcc-arm/
|
||||
http://citeseer.ist.psu.edu/debus04linktime.html
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
Loading…
Reference in New Issue
Block a user