mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-23 05:29:23 +00:00
[Hexagon] Generate "extract" instructions more aggressively
Generate extract instructions (via intrinsics) before the DAG combiner folds shifts into unrecognizable forms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242163 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6e9f427cb2
commit
14e60218b6
@ -22,6 +22,7 @@ add_llvm_target(HexagonCodeGen
|
||||
HexagonExpandPredSpillCode.cpp
|
||||
HexagonFixupHwLoops.cpp
|
||||
HexagonFrameLowering.cpp
|
||||
HexagonGenExtract.cpp
|
||||
HexagonGenInsert.cpp
|
||||
HexagonHardwareLoops.cpp
|
||||
HexagonInstrInfo.cpp
|
||||
|
259
lib/Target/Hexagon/HexagonGenExtract.cpp
Normal file
259
lib/Target/Hexagon/HexagonGenExtract.cpp
Normal file
@ -0,0 +1,259 @@
|
||||
//===--- HexagonGenExtract.cpp --------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
|
||||
cl::Hidden, cl::desc("Cutoff for generating \"extract\""
|
||||
" instructions"));
|
||||
|
||||
// This prevents generating extract instructions that have the offset of 0.
|
||||
// One of the reasons for "extract" is to put a sequence of bits in a regis-
|
||||
// ter, starting at offset 0 (so that these bits can then be used by an
|
||||
// "insert"). If the bits are already at offset 0, it is better not to gene-
|
||||
// rate "extract", since logical bit operations can be merged into compound
|
||||
// instructions (as opposed to "extract").
|
||||
static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
|
||||
cl::desc("No extract instruction with offset 0"));
|
||||
|
||||
static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
|
||||
cl::desc("Require & in extract patterns"));
|
||||
|
||||
namespace llvm {
|
||||
void initializeHexagonGenExtractPass(PassRegistry&);
|
||||
FunctionPass *createHexagonGenExtract();
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
class HexagonGenExtract : public FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
|
||||
initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
virtual const char *getPassName() const override {
|
||||
return "Hexagon generate \"extract\" instructions";
|
||||
}
|
||||
virtual bool runOnFunction(Function &F) override;
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
private:
|
||||
bool visitBlock(BasicBlock *B);
|
||||
bool convert(Instruction *In);
|
||||
|
||||
unsigned ExtractCount;
|
||||
DominatorTree *DT;
|
||||
};
|
||||
|
||||
char HexagonGenExtract::ID = 0;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
|
||||
"\"extract\" instructions", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
|
||||
"\"extract\" instructions", false, false)
|
||||
|
||||
|
||||
bool HexagonGenExtract::convert(Instruction *In) {
|
||||
using namespace PatternMatch;
|
||||
Value *BF = 0;
|
||||
ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
|
||||
BasicBlock *BB = In->getParent();
|
||||
LLVMContext &Ctx = BB->getContext();
|
||||
bool LogicalSR;
|
||||
|
||||
// (and (shl (lshr x, #sr), #sl), #m)
|
||||
LogicalSR = true;
|
||||
bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
|
||||
m_ConstantInt(CSL)),
|
||||
m_ConstantInt(CM)));
|
||||
|
||||
if (!Match) {
|
||||
// (and (shl (ashr x, #sr), #sl), #m)
|
||||
LogicalSR = false;
|
||||
Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
|
||||
m_ConstantInt(CSL)),
|
||||
m_ConstantInt(CM)));
|
||||
}
|
||||
if (!Match) {
|
||||
// (and (shl x, #sl), #m)
|
||||
LogicalSR = true;
|
||||
CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
|
||||
Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
|
||||
m_ConstantInt(CM)));
|
||||
if (Match && NoSR0)
|
||||
return false;
|
||||
}
|
||||
if (!Match) {
|
||||
// (and (lshr x, #sr), #m)
|
||||
LogicalSR = true;
|
||||
CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
|
||||
Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
|
||||
m_ConstantInt(CM)));
|
||||
}
|
||||
if (!Match) {
|
||||
// (and (ashr x, #sr), #m)
|
||||
LogicalSR = false;
|
||||
CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
|
||||
Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
|
||||
m_ConstantInt(CM)));
|
||||
}
|
||||
if (!Match) {
|
||||
CM = 0;
|
||||
// (shl (lshr x, #sr), #sl)
|
||||
LogicalSR = true;
|
||||
Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
|
||||
m_ConstantInt(CSL)));
|
||||
}
|
||||
if (!Match) {
|
||||
CM = 0;
|
||||
// (shl (ashr x, #sr), #sl)
|
||||
LogicalSR = false;
|
||||
Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
|
||||
m_ConstantInt(CSL)));
|
||||
}
|
||||
if (!Match)
|
||||
return false;
|
||||
|
||||
Type *Ty = BF->getType();
|
||||
if (!Ty->isIntegerTy())
|
||||
return false;
|
||||
unsigned BW = Ty->getPrimitiveSizeInBits();
|
||||
if (BW != 32 && BW != 64)
|
||||
return false;
|
||||
|
||||
uint32_t SR = CSR->getZExtValue();
|
||||
uint32_t SL = CSL->getZExtValue();
|
||||
|
||||
if (!CM) {
|
||||
// If there was no and, and the shift left did not remove all potential
|
||||
// sign bits created by the shift right, then extractu cannot reproduce
|
||||
// this value.
|
||||
if (!LogicalSR && (SR > SL))
|
||||
return false;
|
||||
APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
|
||||
CM = ConstantInt::get(Ctx, A);
|
||||
}
|
||||
|
||||
// CM is the shifted-left mask. Shift it back right to remove the zero
|
||||
// bits on least-significant positions.
|
||||
APInt M = CM->getValue().lshr(SL);
|
||||
uint32_t T = M.countTrailingOnes();
|
||||
|
||||
// During the shifts some of the bits will be lost. Calculate how many
|
||||
// of the original value will remain after shift right and then left.
|
||||
uint32_t U = BW - std::max(SL, SR);
|
||||
// The width of the extracted field is the minimum of the original bits
|
||||
// that remain after the shifts and the number of contiguous 1s in the mask.
|
||||
uint32_t W = std::min(U, T);
|
||||
if (W == 0)
|
||||
return false;
|
||||
|
||||
// Check if the extracted bits are contained within the mask that it is
|
||||
// and-ed with. The extract operation will copy these bits, and so the
|
||||
// mask cannot any holes in it that would clear any of the bits of the
|
||||
// extracted field.
|
||||
if (!LogicalSR) {
|
||||
// If the shift right was arithmetic, it could have included some 1 bits.
|
||||
// It is still ok to generate extract, but only if the mask eliminates
|
||||
// those bits (i.e. M does not have any bits set beyond U).
|
||||
APInt C = APInt::getHighBitsSet(BW, BW-U);
|
||||
if (M.intersects(C) || !APIntOps::isMask(W, M))
|
||||
return false;
|
||||
} else {
|
||||
// Check if M starts with a contiguous sequence of W times 1 bits. Get
|
||||
// the low U bits of M (which eliminates the 0 bits shifted in on the
|
||||
// left), and check if the result is APInt's "mask":
|
||||
if (!APIntOps::isMask(W, M.getLoBits(U)))
|
||||
return false;
|
||||
}
|
||||
|
||||
IRBuilder<> IRB(BB, In);
|
||||
Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
|
||||
: Intrinsic::hexagon_S2_extractup;
|
||||
Module *Mod = BB->getParent()->getParent();
|
||||
Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
|
||||
Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
|
||||
if (SL != 0)
|
||||
NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
|
||||
In->replaceAllUsesWith(NewIn);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool HexagonGenExtract::visitBlock(BasicBlock *B) {
|
||||
// Depth-first, bottom-up traversal.
|
||||
DomTreeNode *DTN = DT->getNode(B);
|
||||
typedef GraphTraits<DomTreeNode*> GTN;
|
||||
typedef GTN::ChildIteratorType Iter;
|
||||
for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
|
||||
visitBlock((*I)->getBlock());
|
||||
|
||||
// Allow limiting the number of generated extracts for debugging purposes.
|
||||
bool HasCutoff = ExtractCutoff.getPosition();
|
||||
unsigned Cutoff = ExtractCutoff;
|
||||
|
||||
bool Changed = false;
|
||||
BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
|
||||
while (true) {
|
||||
if (HasCutoff && (ExtractCount >= Cutoff))
|
||||
return Changed;
|
||||
bool Last = (I == Begin);
|
||||
if (!Last)
|
||||
NextI = std::prev(I);
|
||||
Instruction *In = &*I;
|
||||
bool Done = convert(In);
|
||||
if (HasCutoff && Done)
|
||||
ExtractCount++;
|
||||
Changed |= Done;
|
||||
if (Last)
|
||||
break;
|
||||
I = NextI;
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
||||
bool HexagonGenExtract::runOnFunction(Function &F) {
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
bool Changed;
|
||||
|
||||
// Traverse the function bottom-up, to see super-expressions before their
|
||||
// sub-expressions.
|
||||
BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
|
||||
Changed = visitBlock(Entry);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
||||
FunctionPass *llvm::createHexagonGenExtract() {
|
||||
return new HexagonGenExtract();
|
||||
}
|
@ -43,6 +43,8 @@ static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
|
||||
static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
|
||||
cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
|
||||
|
||||
static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
|
||||
cl::Hidden, cl::desc("Generate \"extract\" instructions"));
|
||||
|
||||
/// HexagonTargetMachineModule - Note that this is used on hosts that
|
||||
/// cannot link in a library unless there are references into the
|
||||
@ -66,24 +68,22 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
|
||||
createVLIWMachineSched);
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createHexagonCommonGEP();
|
||||
FunctionPass *createHexagonExpandCondsets();
|
||||
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
|
||||
FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
|
||||
FunctionPass *createHexagonCFGOptimizer();
|
||||
|
||||
FunctionPass *createHexagonSplitConst32AndConst64();
|
||||
FunctionPass *createHexagonCommonGEP();
|
||||
FunctionPass *createHexagonCopyToCombine();
|
||||
FunctionPass *createHexagonExpandCondsets();
|
||||
FunctionPass *createHexagonExpandPredSpillCode();
|
||||
FunctionPass *createHexagonFixupHwLoops();
|
||||
FunctionPass *createHexagonGenExtract();
|
||||
FunctionPass *createHexagonGenInsert();
|
||||
FunctionPass *createHexagonHardwareLoops();
|
||||
FunctionPass *createHexagonPeephole();
|
||||
FunctionPass *createHexagonFixupHwLoops();
|
||||
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createHexagonNewValueJump();
|
||||
FunctionPass *createHexagonCopyToCombine();
|
||||
FunctionPass *createHexagonPacketizer();
|
||||
FunctionPass *createHexagonNewValueJump();
|
||||
FunctionPass *createHexagonPeephole();
|
||||
FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
|
||||
FunctionPass *createHexagonSplitConst32AndConst64();
|
||||
} // end namespace llvm;
|
||||
|
||||
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
|
||||
@ -147,8 +147,13 @@ void HexagonPassConfig::addIRPasses() {
|
||||
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
|
||||
|
||||
addPass(createAtomicExpandPass(TM));
|
||||
if (!NoOpt && EnableCommGEP)
|
||||
addPass(createHexagonCommonGEP());
|
||||
if (!NoOpt) {
|
||||
if (EnableCommGEP)
|
||||
addPass(createHexagonCommonGEP());
|
||||
// Replace certain combinations of shifts and ands with extracts.
|
||||
if (EnableGenExtract)
|
||||
addPass(createHexagonGenExtract());
|
||||
}
|
||||
}
|
||||
|
||||
bool HexagonPassConfig::addInstSelector() {
|
||||
|
76
test/CodeGen/Hexagon/extract-basic.ll
Normal file
76
test/CodeGen/Hexagon/extract-basic.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; CHECK-DAG: extractu(r{{[0-9]*}}, #3, #4)
|
||||
; CHECK-DAG: extractu(r{{[0-9]*}}, #8, #7)
|
||||
; CHECK-DAG: extractu(r{{[0-9]*}}, #8, #16)
|
||||
|
||||
; C source:
|
||||
; typedef struct {
|
||||
; unsigned x1:3;
|
||||
; unsigned x2:7;
|
||||
; unsigned x3:8;
|
||||
; unsigned x4:12;
|
||||
; unsigned x5:2;
|
||||
; } structx_t;
|
||||
;
|
||||
; typedef struct {
|
||||
; unsigned y1:4;
|
||||
; unsigned y2:3;
|
||||
; unsigned y3:9;
|
||||
; unsigned y4:8;
|
||||
; unsigned y5:8;
|
||||
; } structy_t;
|
||||
;
|
||||
; void foo(structx_t *px, structy_t *py) {
|
||||
; px->x1 = py->y1;
|
||||
; px->x2 = py->y2;
|
||||
; px->x3 = py->y3;
|
||||
; px->x4 = py->y4;
|
||||
; px->x5 = py->y5;
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
%struct.structx_t = type { i8, i8, i8, i8 }
|
||||
%struct.structy_t = type { i8, i8, i8, i8 }
|
||||
|
||||
define void @foo(%struct.structx_t* nocapture %px, %struct.structy_t* nocapture %py) nounwind {
|
||||
entry:
|
||||
%0 = bitcast %struct.structy_t* %py to i32*
|
||||
%1 = load i32, i32* %0, align 4
|
||||
%bf.value = and i32 %1, 7
|
||||
%2 = bitcast %struct.structx_t* %px to i32*
|
||||
%3 = load i32, i32* %2, align 4
|
||||
%4 = and i32 %3, -8
|
||||
%5 = or i32 %4, %bf.value
|
||||
store i32 %5, i32* %2, align 4
|
||||
%6 = load i32, i32* %0, align 4
|
||||
%7 = lshr i32 %6, 4
|
||||
%bf.clear1 = shl nuw nsw i32 %7, 3
|
||||
%8 = and i32 %bf.clear1, 56
|
||||
%9 = and i32 %5, -1017
|
||||
%10 = or i32 %8, %9
|
||||
store i32 %10, i32* %2, align 4
|
||||
%11 = load i32, i32* %0, align 4
|
||||
%12 = lshr i32 %11, 7
|
||||
%bf.value4 = shl i32 %12, 10
|
||||
%13 = and i32 %bf.value4, 261120
|
||||
%14 = and i32 %10, -262081
|
||||
%15 = or i32 %14, %13
|
||||
store i32 %15, i32* %2, align 4
|
||||
%16 = load i32, i32* %0, align 4
|
||||
%17 = lshr i32 %16, 16
|
||||
%bf.clear5 = shl i32 %17, 18
|
||||
%18 = and i32 %bf.clear5, 66846720
|
||||
%19 = and i32 %15, -1073480641
|
||||
%20 = or i32 %19, %18
|
||||
store i32 %20, i32* %2, align 4
|
||||
%21 = load i32, i32* %0, align 4
|
||||
%22 = lshr i32 %21, 24
|
||||
%23 = shl i32 %22, 30
|
||||
%24 = and i32 %20, 67107903
|
||||
%25 = or i32 %24, %23
|
||||
store i32 %25, i32* %2, align 4
|
||||
ret void
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -hexagon-extract=0 < %s | FileCheck %s
|
||||
; Check that we generate fused logical and with shift instruction.
|
||||
; Disable "extract" generation, since it may eliminate the and/lsr.
|
||||
|
||||
; CHECK: r{{[0-9]+}} = and(#15, lsr(r{{[0-9]+}}, #{{[0-9]+}})
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user