[SystemZ] Add long branch pass

Before this change, the SystemZ backend would use BRCL for all branches
and only consider shortening them to BRC when generating an object file.
E.g. a branch on equal would use the JGE alias of BRCL in assembly output,
but might be shortened to the JE alias of BRC in ELF output.  This was
a useful first step, but it had two problems:

(1) The z assembler isn't traditionally supposed to perform branch shortening
    or branch relaxation.  We followed this rule by not relaxing branches
    in assembler input, but that meant that generating assembly code and
    then assembling it would not produce the same result as going directly
    to object code; the former would give long branches everywhere, whereas
    the latter would use short branches where possible.

(2) Other useful branches, like COMPARE AND BRANCH, do not have long forms.
    We would need to do something else before supporting them.

    (Although COMPARE AND BRANCH does not change the condition codes,
    the plan is to model COMPARE AND BRANCH as a CC-clobbering instruction
    during codegen, so that we can safely lower it to a separate compare
    and long branch where necessary.  This is not a valid transformation
    for the assembler proper to make.)

This patch therefore moves branch relaxation to a pre-emit pass.
For now, calls are still shortened from BRASL to BRAS by the assembler,
although this too is not really the traditional behaviour.

The first test takes about 1.5s to run, and there are likely to be
more tests in this vein once further branch types are added.  The feeling
on IRC was that 1.5s is a bit much for a single test, so I've restricted
it to SystemZ hosts for now.

The patch exposes (and fixes) some typos in the main CodeGen/SystemZ tests.
A later patch will remove the {{g}}s from that directory.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182274 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford
2013-05-20 14:23:08 +00:00
parent e932e891e5
commit 44b486ed78
17 changed files with 599 additions and 37 deletions

View File

@@ -19,6 +19,7 @@ add_llvm_target(SystemZCodeGen
SystemZISelDAGToDAG.cpp SystemZISelDAGToDAG.cpp
SystemZISelLowering.cpp SystemZISelLowering.cpp
SystemZInstrInfo.cpp SystemZInstrInfo.cpp
SystemZLongBranch.cpp
SystemZMCInstLower.cpp SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp SystemZRegisterInfo.cpp
SystemZSubtarget.cpp SystemZSubtarget.cpp

View File

@@ -35,11 +35,10 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
llvm_unreachable("Unknown fixup kind!"); llvm_unreachable("Unknown fixup kind!");
} }
// If Opcode can be relaxed, return the relaxed form, otherwise return 0. // If Opcode is a relaxable interprocedural reference, return the relaxed form,
// otherwise return 0.
static unsigned getRelaxedOpcode(unsigned Opcode) { static unsigned getRelaxedOpcode(unsigned Opcode) {
switch (Opcode) { switch (Opcode) {
case SystemZ::BRC: return SystemZ::BRCL;
case SystemZ::J: return SystemZ::JG;
case SystemZ::BRAS: return SystemZ::BRASL; case SystemZ::BRAS: return SystemZ::BRASL;
} }
return 0; return 0;

View File

@@ -56,11 +56,7 @@ and conditional returns.
-- --
We don't use the combined COMPARE AND BRANCH instructions. Using them We don't use the combined COMPARE AND BRANCH instructions.
would require a change to the way we handle out-of-range branches.
At the moment, we start with 32-bit forms like BRCL and shorten them
to forms like BRC where possible, but COMPARE AND BRANCH does not have
a 32-bit form.
-- --

View File

@@ -73,5 +73,6 @@ namespace llvm {
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel); CodeGenOpt::Level OptLevel);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
} // end namespace llvm; } // end namespace llvm;
#endif #endif

View File

@@ -1632,7 +1632,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// jCC JoinMBB // jCC JoinMBB
// # fallthrough to FalseMBB // # fallthrough to FalseMBB
MBB = StartMBB; MBB = StartMBB;
BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB); MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB); MBB->addSuccessor(FalseMBB);
@@ -1769,7 +1769,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest) BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB); MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB); MBB->addSuccessor(DoneMBB);
@@ -1846,7 +1846,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
// %RotatedOldVal = RLL %OldVal, 0(%BitShift) // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
// CompareOpcode %RotatedOldVal, %Src2 // CompareOpcode %RotatedOldVal, %Src2
// BRCL KeepOldMask, UpdateMBB // BRC KeepOldMask, UpdateMBB
MBB = LoopMBB; MBB = LoopMBB;
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
.addReg(OrigVal).addMBB(StartMBB) .addReg(OrigVal).addMBB(StartMBB)
@@ -1856,7 +1856,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
.addReg(OldVal).addReg(BitShift).addImm(0); .addReg(OldVal).addReg(BitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CompareOpcode)) BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2); .addReg(RotatedOldVal).addReg(Src2);
BuildMI(MBB, DL, TII->get(SystemZ::BRCL)) BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(KeepOldMask).addMBB(UpdateMBB); .addImm(KeepOldMask).addMBB(UpdateMBB);
MBB->addSuccessor(UpdateMBB); MBB->addSuccessor(UpdateMBB);
MBB->addSuccessor(UseAltMBB); MBB->addSuccessor(UseAltMBB);
@@ -1887,7 +1887,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest) BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB); MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB); MBB->addSuccessor(DoneMBB);
@@ -1978,7 +1978,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
.addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
BuildMI(MBB, DL, TII->get(SystemZ::CR)) BuildMI(MBB, DL, TII->get(SystemZ::CR))
.addReg(Dest).addReg(RetryCmpVal); .addReg(Dest).addReg(RetryCmpVal);
BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(DoneMBB);
MBB->addSuccessor(DoneMBB); MBB->addSuccessor(DoneMBB);
MBB->addSuccessor(SetMBB); MBB->addSuccessor(SetMBB);
@@ -1998,7 +1998,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
.addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB); MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB); MBB->addSuccessor(DoneMBB);

View File

@@ -13,6 +13,7 @@
#include "SystemZInstrInfo.h" #include "SystemZInstrInfo.h"
#include "SystemZInstrBuilder.h" #include "SystemZInstrBuilder.h"
#include "llvm/Target/TargetMachine.h"
#define GET_INSTRINFO_CTOR #define GET_INSTRINFO_CTOR
#define GET_INSTRMAP_INFO #define GET_INSTRMAP_INFO
@@ -229,19 +230,19 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (Cond.empty()) { if (Cond.empty()) {
// Unconditional branch? // Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!"); assert(!FBB && "Unconditional branch with multiple successors!");
BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB); BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB);
return 1; return 1;
} }
// Conditional branch. // Conditional branch.
unsigned Count = 0; unsigned Count = 0;
unsigned CC = Cond[0].getImm(); unsigned CC = Cond[0].getImm();
BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB); BuildMI(&MBB, DL, get(SystemZ::BRC)).addImm(CC).addMBB(TBB);
++Count; ++Count;
if (FBB) { if (FBB) {
// Two-way Conditional branch. Insert the second branch. // Two-way Conditional branch. Insert the second branch.
BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB); BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB);
++Count; ++Count;
} }
return Count; return Count;
@@ -348,6 +349,15 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false; return false;
} }
uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const {
if (MI->getOpcode() == TargetOpcode::INLINEASM) {
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
return MI->getDesc().getSize();
}
bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond, bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
const MachineOperand *&Target) const { const MachineOperand *&Target) const {
switch (MI->getOpcode()) { switch (MI->getOpcode()) {

View File

@@ -93,6 +93,9 @@ public:
// Return the SystemZRegisterInfo, which this class owns. // Return the SystemZRegisterInfo, which this class owns.
const SystemZRegisterInfo &getRegisterInfo() const { return RI; } const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
// Return the size in bytes of MI.
uint64_t getInstSizeInBytes(const MachineInstr *MI) const;
// Return true if MI is a conditional or unconditional branch. // Return true if MI is a conditional or unconditional branch.
// When returning true, set Cond to the mask of condition-code // When returning true, set Cond to the mask of condition-code
// values on which the instruction will branch, and set Target // values on which the instruction will branch, and set Target

View File

@@ -45,16 +45,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), def BR : InstRR<0x07, (outs), (ins ADDR64:$R2),
"br\t$R2", [(brind ADDR64:$R2)]>; "br\t$R2", [(brind ADDR64:$R2)]>;
// An assembler extended mnemonic for BRC. Use a separate instruction for // An assembler extended mnemonic for BRC.
// the asm parser, so that we don't relax Js to external symbols into JGs. def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2",
let isCodeGenOnly = 1 in [(br bb:$I2)]>;
def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>;
def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>;
// An assembler extended mnemonic for BRCL. (The extension is "G" // An assembler extended mnemonic for BRCL. (The extension is "G"
// rather than "L" because "JL" is "Jump if Less".) // rather than "L" because "JL" is "Jump if Less".)
def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>;
"jg\t$I2", [(br bb:$I2)]>;
} }
// Conditional branches. It's easier for LLVM to handle these branches // Conditional branches. It's easier for LLVM to handle these branches
@@ -71,7 +68,7 @@ let isCodeGenOnly = 1 in
defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">; defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">;
defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">; defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">;
def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>; def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRC cond4:$cond, bb:$dst)>;
// Define AsmParser mnemonics for each condition code. // Define AsmParser mnemonics for each condition code.
multiclass CondExtendedMnemonic<bits<4> Cond, string name> { multiclass CondExtendedMnemonic<bits<4> Cond, string name> {

View File

@@ -0,0 +1,357 @@
//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass makes sure that all branches are in range. There are several ways
// in which this could be done. One aggressive approach is to assume that all
// branches are in range and successively replace those that turn out not
// to be in range with a longer form (branch relaxation). A simple
// implementation is to continually walk through the function relaxing
// branches until no more changes are needed and a fixed point is reached.
// However, in the pathological worst case, this implementation is
// quadratic in the number of blocks; relaxing branch N can make branch N-1
// go out of range, which in turn can make branch N-2 go out of range,
// and so on.
//
// An alternative approach is to assume that all branches must be
// converted to their long forms, then reinstate the short forms of
// branches that, even under this pessimistic assumption, turn out to be
// in range (branch shortening). This too can be implemented as a function
// walk that is repeated until a fixed point is reached. In general,
// the result of shortening is not as good as that of relaxation, and
// shortening is also quadratic in the worst case; shortening branch N
// can bring branch N-1 in range of the short form, which in turn can do
// the same for branch N-2, and so on. The main advantage of shortening
// is that each walk through the function produces valid code, so it is
// possible to stop at any point after the first walk. The quadraticness
// could therefore be handled with a maximum pass count, although the
// question then becomes: what maximum count should be used?
//
// On SystemZ, long branches are only needed for functions bigger than 64k,
// which are relatively rare to begin with, and the long branch sequences
// are actually relatively cheap. It therefore doesn't seem worth spending
// much compilation time on the problem. Instead, the approach we take is:
//
// (1) Check whether all branches can be short (the usual case). Exit the
// pass if so.
// (2) If one branch needs to be long, work out the address that each block
// would have if all branches need to be long, as for shortening above.
// (3) Relax any branch that is out of range according to this pessimistic
// assumption.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "systemz-long-branch"
#include "SystemZTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
STATISTIC(LongBranches, "Number of long branches.");
namespace {
typedef MachineBasicBlock::iterator Iter;
// Represents positional information about a basic block.
struct MBBInfo {
// The address that we currently assume the block has, relative to
// the start of the function. This is designed so that taking the
// difference between two addresses gives a conservative upper bound
// on the distance between them.
uint64_t Address;
// The size of the block in bytes, excluding terminators.
// This value never changes.
uint64_t Size;
// The minimum alignment of the block, as a log2 value.
// This value never changes.
unsigned Alignment;
// The number of terminators in this block. This value never changes.
unsigned NumTerminators;
MBBInfo()
: Address(0), Size(0), Alignment(0), NumTerminators(0) {}
};
// Represents the state of a block terminator.
struct TerminatorInfo {
// If this terminator is a relaxable branch, this points to the branch
// instruction, otherwise it is null.
MachineInstr *Branch;
// The current address of the terminator, in the same form as
// for BlockInfo.
uint64_t Address;
// The current size of the terminator in bytes.
uint64_t Size;
// If Branch is nonnull, this is the number of the target block,
// otherwise it is unused.
unsigned TargetBlock;
// If Branch is nonnull, this is the length of the longest relaxed form,
// otherwise it is zero.
unsigned ExtraRelaxSize;
TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {}
};
// Used to keep track of the current position while iterating over the blocks.
struct BlockPosition {
// The offset from the start of the function, in the same form
// as BlockInfo.
uint64_t Address;
// The number of low bits in Address that are known to be the same
// as the runtime address.
unsigned KnownBits;
BlockPosition(unsigned InitialAlignment)
: Address(0), KnownBits(InitialAlignment) {}
};
class SystemZLongBranch : public MachineFunctionPass {
public:
static char ID;
SystemZLongBranch(const SystemZTargetMachine &tm)
: MachineFunctionPass(ID),
TII(static_cast<const SystemZInstrInfo *>(tm.getInstrInfo())) {}
virtual const char *getPassName() const {
return "SystemZ Long Branch";
}
bool runOnMachineFunction(MachineFunction &F);
private:
void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
bool AssumeRelaxed);
TerminatorInfo describeTerminator(MachineInstr *MI);
uint64_t initMBBInfo();
bool mustRelaxBranch(const TerminatorInfo &Terminator);
bool mustRelaxABranch();
void setWorstCaseAddresses();
void relaxBranch(TerminatorInfo &Terminator);
void relaxBranches();
const SystemZInstrInfo *TII;
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBs;
SmallVector<TerminatorInfo, 16> Terminators;
};
char SystemZLongBranch::ID = 0;
const uint64_t MaxBackwardRange = 0x10000;
const uint64_t MaxForwardRange = 0xfffe;
} // end of anonymous namespace
FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
return new SystemZLongBranch(TM);
}
// Position describes the state immediately before Block. Update Block
// accordingly and move Position to the end of the block's non-terminator
// instructions.
void SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
MBBInfo &Block) {
if (Block.Alignment > Position.KnownBits) {
// When calculating the address of Block, we need to conservatively
// assume that Block had the worst possible misalignment.
Position.Address += ((uint64_t(1) << Block.Alignment) -
(uint64_t(1) << Position.KnownBits));
Position.KnownBits = Block.Alignment;
}
// Align the addresses.
uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
Position.Address = (Position.Address + AlignMask) & ~AlignMask;
// Record the block's position.
Block.Address = Position.Address;
// Move past the non-terminators in the block.
Position.Address += Block.Size;
}
// Position describes the state immediately before Terminator.
// Update Terminator accordingly and move Position past it.
// Assume that Terminator will be relaxed if AssumeRelaxed.
void SystemZLongBranch::skipTerminator(BlockPosition &Position,
TerminatorInfo &Terminator,
bool AssumeRelaxed) {
Terminator.Address = Position.Address;
Position.Address += Terminator.Size;
if (AssumeRelaxed)
Position.Address += Terminator.ExtraRelaxSize;
}
// Return a description of terminator instruction MI.
TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
TerminatorInfo Terminator;
Terminator.Size = TII->getInstSizeInBytes(MI);
if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
Terminator.Branch = MI;
switch (MI->getOpcode()) {
case SystemZ::J:
// Relaxes to JG, which is 2 bytes longer.
Terminator.TargetBlock = MI->getOperand(0).getMBB()->getNumber();
Terminator.ExtraRelaxSize = 2;
break;
case SystemZ::BRC:
// Relaxes to BRCL, which is 2 bytes longer. Operand 0 is the
// condition code mask.
Terminator.TargetBlock = MI->getOperand(1).getMBB()->getNumber();
Terminator.ExtraRelaxSize = 2;
break;
default:
llvm_unreachable("Unrecognized branch instruction");
}
}
return Terminator;
}
// Fill MBBs and Terminators, setting the addresses on the assumption
// that no branches need relaxation. Return the size of the function under
// this assumption.
uint64_t SystemZLongBranch::initMBBInfo() {
MF->RenumberBlocks();
unsigned NumBlocks = MF->size();
MBBs.clear();
MBBs.resize(NumBlocks);
Terminators.clear();
Terminators.reserve(NumBlocks);
BlockPosition Position(MF->getAlignment());
for (unsigned I = 0; I < NumBlocks; ++I) {
MachineBasicBlock *MBB = MF->getBlockNumbered(I);
MBBInfo &Block = MBBs[I];
// Record the alignment, for quick access.
Block.Alignment = MBB->getAlignment();
// Calculate the size of the fixed part of the block.
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
while (MI != End && !MI->isTerminator()) {
Block.Size += TII->getInstSizeInBytes(MI);
++MI;
}
skipNonTerminators(Position, Block);
// Add the terminators.
while (MI != End) {
if (!MI->isDebugValue()) {
assert(MI->isTerminator() && "Terminator followed by non-terminator");
Terminators.push_back(describeTerminator(MI));
skipTerminator(Position, Terminators.back(), false);
++Block.NumTerminators;
}
++MI;
}
}
return Position.Address;
}
// Return true if, under current assumptions, Terminator needs to be relaxed.
bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator) {
if (!Terminator.Branch)
return false;
const MBBInfo &Target = MBBs[Terminator.TargetBlock];
if (Target.Address < Terminator.Address) {
if (Terminator.Address - Target.Address <= MaxBackwardRange)
return false;
} else {
if (Target.Address - Terminator.Address <= MaxForwardRange)
return false;
}
return true;
}
// Return true if, under current assumptions, any terminator needs
// to be relaxed.
bool SystemZLongBranch::mustRelaxABranch() {
for (SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(),
TE = Terminators.end(); TI != TE; ++TI)
if (mustRelaxBranch(*TI))
return true;
return false;
}
// Set the address of each block on the assumption that all branches
// must be long.
void SystemZLongBranch::setWorstCaseAddresses() {
SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
BlockPosition Position(MF->getAlignment());
for (SmallVector<MBBInfo, 16>::iterator BI = MBBs.begin(), BE = MBBs.end();
BI != BE; ++BI) {
skipNonTerminators(Position, *BI);
for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) {
skipTerminator(Position, *TI, true);
++TI;
}
}
}
// Relax the branch described by Terminator.
void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
MachineInstr *Branch = Terminator.Branch;
switch (Branch->getOpcode()) {
case SystemZ::J:
Branch->setDesc(TII->get(SystemZ::JG));
break;
case SystemZ::BRC:
Branch->setDesc(TII->get(SystemZ::BRCL));
break;
default:
llvm_unreachable("Unrecognized branch");
}
Terminator.Size += Terminator.ExtraRelaxSize;
Terminator.ExtraRelaxSize = 0;
Terminator.Branch = 0;
++LongBranches;
}
// Relax any branches that need to be relaxed, under current assumptions.
void SystemZLongBranch::relaxBranches() {
for (SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(),
TE = Terminators.end(); TI != TE; ++TI)
if (mustRelaxBranch(*TI))
relaxBranch(*TI);
}
bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
MF = &F;
uint64_t Size = initMBBInfo();
if (Size <= MaxForwardRange || !mustRelaxABranch())
return false;
setWorstCaseAddresses();
relaxBranches();
return true;
}

View File

@@ -15,15 +15,10 @@
using namespace llvm; using namespace llvm;
// Where relaxable pairs of reloc-generating instructions exist, // If Opcode is an interprocedural reference that can be shortened,
// we tend to use the longest form by default, since that produces // return the short form, otherwise return 0.
// correct assembly in cases where no relaxation is performed.
// If Opcode is one such instruction, return the opcode for the
// shortest possible form instead, otherwise return Opcode itself.
static unsigned getShortenedInstr(unsigned Opcode) { static unsigned getShortenedInstr(unsigned Opcode) {
switch (Opcode) { switch (Opcode) {
case SystemZ::BRCL: return SystemZ::BRC;
case SystemZ::JG: return SystemZ::J;
case SystemZ::BRASL: return SystemZ::BRAS; case SystemZ::BRASL: return SystemZ::BRAS;
} }
return Opcode; return Opcode;

View File

@@ -47,7 +47,8 @@ public:
return getTM<SystemZTargetMachine>(); return getTM<SystemZTargetMachine>();
} }
virtual bool addInstSelector(); virtual bool addInstSelector() LLVM_OVERRIDE;
virtual bool addPreEmitPass() LLVM_OVERRIDE;
}; };
} // end anonymous namespace } // end anonymous namespace
@@ -56,6 +57,11 @@ bool SystemZPassConfig::addInstSelector() {
return false; return false;
} }
bool SystemZPassConfig::addPreEmitPass() {
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
return true;
}
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SystemZPassConfig(this, PM); return new SystemZPassConfig(this, PM);
} }

View File

@@ -0,0 +1,105 @@
# Test normal conditional branches in cases where the sheer number of
# instructions causes some branches to be out of range.
# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
# Construct:
#
# before0:
# conditional branch to after0
# ...
# beforeN:
# conditional branch to after0
# main:
# 0xffd8 bytes, from MVIY instructions
# conditional branch to main
# after0:
# ...
# conditional branch to main
# afterN:
#
# Each conditional branch sequence occupies 8 bytes if it uses a short branch
# and 10 if it uses a long one. The ones before "main:" have to take the branch
# length into account -- which is 4 bytes for short branches -- so the final
# (0x28 - 4) / 8 == 4 blocks can use short branches. The ones after "main:"
# do not, so the first 0x28 / 8 == 5 can use short branches. However,
# the conservative algorithm we use makes one branch unnecessarily long
# on each side.
#
# CHECK: c %r4, 0(%r3)
# CHECK: jge [[LABEL:\.L[^ ]*]]
# CHECK: c %r4, 4(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 8(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 12(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 16(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 20(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 24(%r3)
# CHECK: j{{g?}}e [[LABEL]]
# CHECK: c %r4, 28(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 32(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 36(%r3)
# CHECK: je [[LABEL]]
# ...main goes here...
# CHECK: c %r4, 100(%r3)
# CHECK: je [[LABEL:\.L[^ ]*]]
# CHECK: c %r4, 104(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 108(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 112(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 116(%r3)
# CHECK: j{{g?}}e [[LABEL]]
# CHECK: c %r4, 120(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 124(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 128(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 132(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 136(%r3)
# CHECK: jge [[LABEL]]
branch_blocks = 10
main_size = 0xffd8
print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {'
print 'entry:'
print ' br label %before0'
print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
print ' %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
print ' %%bcur%d = load volatile i32 *%%bstop%d' % (i, i)
print ' %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ''
print '%s:' % next
a, b = 1, 1
for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25)
print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
print ''
print 'after%d:' % i
print ' ret void'
print '}'

View File

@@ -0,0 +1,82 @@
# Test normal conditional branches in cases where block alignments cause
# some branches to be out of range.
# RUN: python %s | llc -mtriple=s390x-linux-gnu -align-all-blocks=8 | FileCheck %s
# Construct:
#
# b0:
# conditional branch to end
# ...
# b<N>:
# conditional branch to end
# b<N+1>:
# conditional branch to b0
# ...
# b<2*N>:
# conditional branch to b0
# end:
#
# with N == 256 + 4. The -align-all-blocks=8 option ensures that all blocks
# are 256 bytes in size. The first 4 blocks and the last 4 blocks are then
# out of range.
#
# CHECK: c %r4, 0(%r3)
# CHECK: jge [[LABEL:\.L[^ ]*]]
# CHECK: c %r4, 4(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 8(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 12(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 16(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 20(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 24(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 28(%r3)
# CHECK: je [[LABEL]]
# ...lots of other blocks...
# CHECK: c %r4, 1004(%r3)
# CHECK: je [[LABEL:\.L[^ ]*]]
# CHECK: c %r4, 1008(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 1012(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 1016(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 1020(%r3)
# CHECK: je [[LABEL]]
# CHECK: c %r4, 1024(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 1028(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 1032(%r3)
# CHECK: jge [[LABEL]]
# CHECK: c %r4, 1036(%r3)
# CHECK: jge [[LABEL]]
blocks = 256 + 4
print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {'
print 'entry:'
print ' br label %b0'
print ''
a, b = 1, 1
for i in xrange(blocks):
a, b = b, a + b
value = a % 256
next = 'b%d' % (i + 1) if i + 1 < blocks else 'end'
other = 'end' if 2 * i < blocks else 'b0'
print 'b%d:' % i
print ' store volatile i8 %d, i8 *%%base' % value
print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
print ' br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next)
print ''
print '%s:' % next
print ' ret void'
print '}'

View File

@@ -0,0 +1,10 @@
config.suffixes = ['.py']
# These tests take on the order of seconds to run, so skip them unless
# running natively.
if config.root.host_arch not in ['SystemZ']:
config.unsupported = True
targets = set(config.root.targets_to_build.split())
if not 'SystemZ' in targets:
config.unsupported = True

View File

@@ -6,7 +6,7 @@ define i32 @f1(i32 %x, i32 %y, i32 %op) {
; CHECK: f1: ; CHECK: f1:
; CHECK: ahi %r4, -1 ; CHECK: ahi %r4, -1
; CHECK: clfi %r4, 5 ; CHECK: clfi %r4, 5
; CHECK-NEXT: j{{g?}}g ; CHECK-NEXT: j{{g?}}h
; CHECK: llgfr [[OP64:%r[0-5]]], %r4 ; CHECK: llgfr [[OP64:%r[0-5]]], %r4
; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3 ; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
; CHECK: larl [[BASE:%r[1-5]]] ; CHECK: larl [[BASE:%r[1-5]]]

View File

@@ -202,7 +202,7 @@ define double @f14(double %a, double %b, i16 *%ptr) {
define double @f15(double %a, double %b, i16 *%ptr) { define double @f15(double %a, double %b, i16 *%ptr) {
; CHECK: f15: ; CHECK: f15:
; CHECK: chhsi 0(%r2), -32767 ; CHECK: chhsi 0(%r2), -32767
; CHECK-NEXT: j{{g?}}g ; CHECK-NEXT: j{{g?}}h
; CHECK: br %r14 ; CHECK: br %r14
%val = load i16 *%ptr %val = load i16 *%ptr
%ext = sext i16 %val to i32 %ext = sext i16 %val to i32

View File

@@ -202,7 +202,7 @@ define double @f14(double %a, double %b, i16 *%ptr) {
define double @f15(double %a, double %b, i16 *%ptr) { define double @f15(double %a, double %b, i16 *%ptr) {
; CHECK: f15: ; CHECK: f15:
; CHECK: chhsi 0(%r2), -32767 ; CHECK: chhsi 0(%r2), -32767
; CHECK-NEXT: j{{g?}}g ; CHECK-NEXT: j{{g?}}h
; CHECK: br %r14 ; CHECK: br %r14
%val = load i16 *%ptr %val = load i16 *%ptr
%ext = sext i16 %val to i64 %ext = sext i16 %val to i64