mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
R600/SI: cleanup literal handling v3
Seems to be allot simpler, and also paves the way for further improvements. v2: rebased on master, use 0 in BUFFER_LOAD_FORMAT_XYZW, use VGPR0 in dummy EXP, avoid compiler warning, break after encoding the first literal. v3: correctly use V_ADD_F32_e64 This is a candidate for the stable branch. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175354 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8e4eebcecf
commit
e25e490793
@ -30,7 +30,6 @@ FunctionPass *createSIAnnotateControlFlowPass();
|
|||||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||||
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
|
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
|
||||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||||
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
|
|
||||||
FunctionPass *createSIInsertWaits(TargetMachine &tm);
|
FunctionPass *createSIInsertWaits(TargetMachine &tm);
|
||||||
|
|
||||||
// Passes common to R600 and SI
|
// Passes common to R600 and SI
|
||||||
|
@ -91,8 +91,6 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
|
|||||||
switch (reg) {
|
switch (reg) {
|
||||||
default: break;
|
default: break;
|
||||||
case AMDGPU::EXEC:
|
case AMDGPU::EXEC:
|
||||||
case AMDGPU::SI_LITERAL_CONSTANT:
|
|
||||||
case AMDGPU::SREG_LIT_0:
|
|
||||||
case AMDGPU::M0:
|
case AMDGPU::M0:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
|
|||||||
addPass(&FinalizeMachineBundlesID);
|
addPass(&FinalizeMachineBundlesID);
|
||||||
addPass(createR600LowerConstCopy(*TM));
|
addPass(createR600LowerConstCopy(*TM));
|
||||||
} else {
|
} else {
|
||||||
addPass(createSILowerLiteralConstantsPass(*TM));
|
|
||||||
addPass(createSILowerControlFlowPass(*TM));
|
addPass(createSILowerControlFlowPass(*TM));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,13 @@
|
|||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
/// \brief Helper type used in encoding
|
||||||
|
typedef union {
|
||||||
|
int32_t I;
|
||||||
|
float F;
|
||||||
|
} IntFloatUnion;
|
||||||
|
|
||||||
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
||||||
SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
||||||
void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
||||||
@ -35,6 +42,15 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
|||||||
const MCSubtargetInfo &STI;
|
const MCSubtargetInfo &STI;
|
||||||
MCContext &Ctx;
|
MCContext &Ctx;
|
||||||
|
|
||||||
|
/// \brief Encode a sequence of registers with the correct alignment.
|
||||||
|
unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
|
||||||
|
|
||||||
|
/// \brief Can this operand also contain immediate values?
|
||||||
|
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
|
||||||
|
|
||||||
|
/// \brief Encode an fp or int literal
|
||||||
|
uint32_t getLitEncoding(const MCOperand &MO) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
|
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
|
||||||
const MCSubtargetInfo &sti, MCContext &ctx)
|
const MCSubtargetInfo &sti, MCContext &ctx)
|
||||||
@ -50,11 +66,6 @@ public:
|
|||||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
SmallVectorImpl<MCFixup> &Fixups) const;
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
/// \brief Encode a sequence of registers with the correct alignment.
|
|
||||||
unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
|
|
||||||
|
|
||||||
/// \brief Encoding for when 2 consecutive registers are used
|
/// \brief Encoding for when 2 consecutive registers are used
|
||||||
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
|
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||||
@ -73,39 +84,131 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
|||||||
return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
|
return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
|
||||||
|
unsigned OpNo) const {
|
||||||
|
|
||||||
|
unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
|
||||||
|
return (AMDGPU::SSrc_32RegClassID == RegClass) ||
|
||||||
|
(AMDGPU::SSrc_64RegClassID == RegClass) ||
|
||||||
|
(AMDGPU::VSrc_32RegClassID == RegClass) ||
|
||||||
|
(AMDGPU::VSrc_64RegClassID == RegClass);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
|
||||||
|
|
||||||
|
IntFloatUnion Imm;
|
||||||
|
if (MO.isImm())
|
||||||
|
Imm.I = MO.getImm();
|
||||||
|
else if (MO.isFPImm())
|
||||||
|
Imm.F = MO.getFPImm();
|
||||||
|
else
|
||||||
|
return ~0;
|
||||||
|
|
||||||
|
if (Imm.I >= 0 && Imm.I <= 64)
|
||||||
|
return 128 + Imm.I;
|
||||||
|
|
||||||
|
if (Imm.I >= -16 && Imm.I <= -1)
|
||||||
|
return 192 + abs(Imm.I);
|
||||||
|
|
||||||
|
if (Imm.F == 0.5f)
|
||||||
|
return 240;
|
||||||
|
|
||||||
|
if (Imm.F == -0.5f)
|
||||||
|
return 241;
|
||||||
|
|
||||||
|
if (Imm.F == 1.0f)
|
||||||
|
return 242;
|
||||||
|
|
||||||
|
if (Imm.F == -1.0f)
|
||||||
|
return 243;
|
||||||
|
|
||||||
|
if (Imm.F == 2.0f)
|
||||||
|
return 244;
|
||||||
|
|
||||||
|
if (Imm.F == -2.0f)
|
||||||
|
return 245;
|
||||||
|
|
||||||
|
if (Imm.F == 4.0f)
|
||||||
|
return 246;
|
||||||
|
|
||||||
|
if (Imm.F == 4.0f)
|
||||||
|
return 247;
|
||||||
|
|
||||||
|
return 255;
|
||||||
|
}
|
||||||
|
|
||||||
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||||
|
|
||||||
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
|
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
|
||||||
unsigned bytes = MCII.get(MI.getOpcode()).getSize();
|
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
|
||||||
|
unsigned bytes = Desc.getSize();
|
||||||
|
|
||||||
for (unsigned i = 0; i < bytes; i++) {
|
for (unsigned i = 0; i < bytes; i++) {
|
||||||
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
|
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bytes > 4)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Check for additional literals in SRC0/1/2 (Op 1/2/3)
|
||||||
|
for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
|
||||||
|
|
||||||
|
// Check if this operand should be encoded as [SV]Src
|
||||||
|
if (!isSrcOperand(Desc, i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Is this operand a literal immediate?
|
||||||
|
const MCOperand &Op = MI.getOperand(i);
|
||||||
|
if (getLitEncoding(Op) != 255)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Yes! Encode it
|
||||||
|
IntFloatUnion Imm;
|
||||||
|
if (Op.isImm())
|
||||||
|
Imm.I = Op.getImm();
|
||||||
|
else
|
||||||
|
Imm.F = Op.getFPImm();
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < 4; j++) {
|
||||||
|
OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only one literal value allowed
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
||||||
const MCOperand &MO,
|
const MCOperand &MO,
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||||
if (MO.isReg()) {
|
if (MO.isReg())
|
||||||
return MRI.getEncodingValue(MO.getReg());
|
return MRI.getEncodingValue(MO.getReg());
|
||||||
} else if (MO.isImm()) {
|
|
||||||
return MO.getImm();
|
if (MO.isExpr()) {
|
||||||
} else if (MO.isFPImm()) {
|
|
||||||
// XXX: Not all instructions can use inline literals
|
|
||||||
// XXX: We should make sure this is a 32-bit constant
|
|
||||||
union {
|
|
||||||
float F;
|
|
||||||
uint32_t I;
|
|
||||||
} Imm;
|
|
||||||
Imm.F = MO.getFPImm();
|
|
||||||
return Imm.I;
|
|
||||||
} else if (MO.isExpr()) {
|
|
||||||
const MCExpr *Expr = MO.getExpr();
|
const MCExpr *Expr = MO.getExpr();
|
||||||
MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
|
MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
|
||||||
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
|
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
|
||||||
return 0;
|
return 0;
|
||||||
} else{
|
|
||||||
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Figure out the operand number, needed for isSrcOperand check
|
||||||
|
unsigned OpNo = 0;
|
||||||
|
for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
|
||||||
|
if (&MO == &MI.getOperand(OpNo))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
|
||||||
|
if (isSrcOperand(Desc, OpNo)) {
|
||||||
|
uint32_t Enc = getLitEncoding(MO);
|
||||||
|
if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
|
||||||
|
return Enc;
|
||||||
|
|
||||||
|
} else if (MO.isImm())
|
||||||
|
return MO.getImm();
|
||||||
|
|
||||||
|
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,6 +221,7 @@ unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
|
|||||||
unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg());
|
unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg());
|
||||||
return (regCode & 0xff) >> shift;
|
return (regCode & 0xff) >> shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
|
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
|
||||||
unsigned OpNo ,
|
unsigned OpNo ,
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
SmallVectorImpl<MCFixup> &Fixup) const {
|
||||||
|
@ -77,8 +77,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0x80) // SRC1
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0x80) // SRC2
|
||||||
.addImm(0) // ABS
|
.addImm(0) // ABS
|
||||||
.addImm(1) // CLAMP
|
.addImm(1) // CLAMP
|
||||||
.addImm(0) // OMOD
|
.addImm(0) // OMOD
|
||||||
@ -90,8 +90,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0x80) // SRC1
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0x80) // SRC2
|
||||||
.addImm(1) // ABS
|
.addImm(1) // ABS
|
||||||
.addImm(0) // CLAMP
|
.addImm(0) // CLAMP
|
||||||
.addImm(0) // OMOD
|
.addImm(0) // OMOD
|
||||||
@ -103,8 +103,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0x80) // SRC1
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0x80) // SRC2
|
||||||
.addImm(0) // ABS
|
.addImm(0) // ABS
|
||||||
.addImm(0) // CLAMP
|
.addImm(0) // CLAMP
|
||||||
.addImm(0) // OMOD
|
.addImm(0) // OMOD
|
||||||
@ -176,7 +176,7 @@ void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
|||||||
BuildMI(BB, I, BB.findDebugLoc(I),
|
BuildMI(BB, I, BB.findDebugLoc(I),
|
||||||
TII->get(AMDGPU::V_CMP_GT_F32_e32),
|
TII->get(AMDGPU::V_CMP_GT_F32_e32),
|
||||||
VCC)
|
VCC)
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0)
|
||||||
.addOperand(MI->getOperand(1));
|
.addOperand(MI->getOperand(1));
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32))
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32))
|
||||||
|
@ -68,7 +68,7 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
|
|
||||||
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||||
int64_t Imm) const {
|
int64_t Imm) const {
|
||||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
|
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
|
||||||
MachineInstrBuilder MIB(*MF, MI);
|
MachineInstrBuilder MIB(*MF, MI);
|
||||||
MIB.addReg(DstReg, RegState::Define);
|
MIB.addReg(DstReg, RegState::Define);
|
||||||
MIB.addImm(Imm);
|
MIB.addImm(Imm);
|
||||||
@ -84,9 +84,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const {
|
|||||||
case AMDGPU::S_MOV_B64:
|
case AMDGPU::S_MOV_B64:
|
||||||
case AMDGPU::V_MOV_B32_e32:
|
case AMDGPU::V_MOV_B32_e32:
|
||||||
case AMDGPU::V_MOV_B32_e64:
|
case AMDGPU::V_MOV_B32_e64:
|
||||||
case AMDGPU::V_MOV_IMM_F32:
|
|
||||||
case AMDGPU::V_MOV_IMM_I32:
|
|
||||||
case AMDGPU::S_MOV_IMM_I32:
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1018,45 +1018,6 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
|
|||||||
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
|
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
|
||||||
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
|
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
|
||||||
|
|
||||||
class V_MOV_IMM <ValueType type, Operand immType, SDNode immNode> : InstSI <
|
|
||||||
(outs VReg_32:$dst),
|
|
||||||
(ins immType:$src0),
|
|
||||||
"V_MOV_IMM",
|
|
||||||
[(set VReg_32:$dst, (type immNode:$src0))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
|
||||||
|
|
||||||
def V_MOV_IMM_I32 : V_MOV_IMM<i32, i32imm, imm>;
|
|
||||||
def V_MOV_IMM_F32 : V_MOV_IMM<f32, f32imm, fpimm>;
|
|
||||||
|
|
||||||
def S_MOV_IMM_I32 : InstSI <
|
|
||||||
(outs SReg_32:$dst),
|
|
||||||
(ins i32imm:$src0),
|
|
||||||
"S_MOV_IMM_I32",
|
|
||||||
[(set SReg_32:$dst, (imm:$src0))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
} // End isCodeGenOnly, isPseudo = 1
|
|
||||||
|
|
||||||
// i64 immediates aren't supported in hardware, split it into two 32bit values
|
|
||||||
def : Pat <
|
|
||||||
(i64 imm:$imm),
|
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
|
||||||
(S_MOV_IMM_I32 (LO32 imm:$imm)), sub0),
|
|
||||||
(S_MOV_IMM_I32 (HI32 imm:$imm)), sub1)
|
|
||||||
>;
|
|
||||||
|
|
||||||
class SI_LOAD_LITERAL<Operand ImmType> :
|
|
||||||
Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
|
|
||||||
|
|
||||||
bits<32> imm;
|
|
||||||
let Inst{31-0} = imm;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
|
|
||||||
def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
|
|
||||||
|
|
||||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||||
|
|
||||||
def SET_M0 : InstSI <
|
def SET_M0 : InstSI <
|
||||||
@ -1173,7 +1134,7 @@ def SI_KILL : InstSI <
|
|||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_AMDGPU_kilp),
|
(int_AMDGPU_kilp),
|
||||||
(SI_KILL (V_MOV_IMM_I32 0xbf800000))
|
(SI_KILL (V_MOV_B32_e32 0xbf800000))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* int_SI_vs_load_input */
|
/* int_SI_vs_load_input */
|
||||||
@ -1182,7 +1143,7 @@ def : Pat<
|
|||||||
VReg_32:$buf_idx_vgpr),
|
VReg_32:$buf_idx_vgpr),
|
||||||
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
|
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
|
||||||
VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
|
VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
|
||||||
0, 0, (i32 SREG_LIT_0))
|
0, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* int_SI_export */
|
/* int_SI_export */
|
||||||
@ -1319,6 +1280,38 @@ def : Pat <
|
|||||||
(COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
|
(COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
/********** ================== **********/
|
||||||
|
/********** Immediate Patterns **********/
|
||||||
|
/********** ================== **********/
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(i32 imm:$imm),
|
||||||
|
(V_MOV_B32_e32 imm:$imm)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(f32 fpimm:$imm),
|
||||||
|
(V_MOV_B32_e32 fpimm:$imm)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(i32 imm:$imm),
|
||||||
|
(S_MOV_B32 imm:$imm)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(f32 fpimm:$imm),
|
||||||
|
(S_MOV_B32 fpimm:$imm)
|
||||||
|
>;
|
||||||
|
|
||||||
|
// i64 immediates aren't supported in hardware, split it into two 32bit values
|
||||||
|
def : Pat <
|
||||||
|
(i64 imm:$imm),
|
||||||
|
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
||||||
|
(S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
|
||||||
|
(S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
|
||||||
|
>;
|
||||||
|
|
||||||
/********** ===================== **********/
|
/********** ===================== **********/
|
||||||
/********** Interpolation Paterns **********/
|
/********** Interpolation Paterns **********/
|
||||||
/********** ===================== **********/
|
/********** ===================== **********/
|
||||||
@ -1397,12 +1390,12 @@ def : Pat<
|
|||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fcos VSrc_32:$src0),
|
(fcos VSrc_32:$src0),
|
||||||
(V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV)))
|
(V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fsin VSrc_32:$src0),
|
(fsin VSrc_32:$src0),
|
||||||
(V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV)))
|
(V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
@ -1448,7 +1441,7 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
|
|||||||
// 2. Offset loaded in an 32bit SGPR
|
// 2. Offset loaded in an 32bit SGPR
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
|
(constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
|
||||||
(vt (Instr_SGPR SReg_64:$sbase, (S_MOV_IMM_I32 imm:$offset)))
|
(vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// 3. No offset at all
|
// 3. No offset at all
|
||||||
|
@ -158,10 +158,10 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
|
|||||||
.addImm(0)
|
.addImm(0)
|
||||||
.addImm(1)
|
.addImm(1)
|
||||||
.addImm(1)
|
.addImm(1)
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addReg(AMDGPU::VGPR0)
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addReg(AMDGPU::VGPR0)
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addReg(AMDGPU::VGPR0)
|
||||||
.addReg(AMDGPU::SREG_LIT_0);
|
.addReg(AMDGPU::VGPR0);
|
||||||
|
|
||||||
// ... and terminate wavefront
|
// ... and terminate wavefront
|
||||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
|
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
|
||||||
@ -296,7 +296,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
|
|||||||
|
|
||||||
// Clear this pixel from the exec mask if the operand is negative
|
// Clear this pixel from the exec mask if the operand is negative
|
||||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
|
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
.addImm(0)
|
||||||
.addOperand(MI.getOperand(0));
|
.addOperand(MI.getOperand(0));
|
||||||
|
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
|
@ -1,107 +0,0 @@
|
|||||||
//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
/// \file
|
|
||||||
/// \brief This pass performs the following transformation on instructions with
|
|
||||||
/// literal constants:
|
|
||||||
///
|
|
||||||
/// %VGPR0 = V_MOV_IMM_I32 1
|
|
||||||
///
|
|
||||||
/// becomes:
|
|
||||||
///
|
|
||||||
/// BUNDLE
|
|
||||||
/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
|
|
||||||
/// * SI_LOAD_LITERAL 1
|
|
||||||
///
|
|
||||||
/// The resulting sequence matches exactly how the hardware handles immediate
|
|
||||||
/// operands, so this transformation greatly simplifies the code generator.
|
|
||||||
///
|
|
||||||
/// Only the *_MOV_IMM_* support immediate operands at the moment, but when
|
|
||||||
/// support for immediate operands is added to other instructions, they
|
|
||||||
/// will be lowered here as well.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBundle.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class SILowerLiteralConstantsPass : public MachineFunctionPass {
|
|
||||||
|
|
||||||
private:
|
|
||||||
static char ID;
|
|
||||||
const TargetInstrInfo *TII;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SILowerLiteralConstantsPass(TargetMachine &tm) :
|
|
||||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
const char *getPassName() const {
|
|
||||||
return "SI Lower literal constants pass";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
char SILowerLiteralConstantsPass::ID = 0;
|
|
||||||
|
|
||||||
FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
|
|
||||||
return new SILowerLiteralConstantsPass(tm);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
|
|
||||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
||||||
BB != BB_E; ++BB) {
|
|
||||||
MachineBasicBlock &MBB = *BB;
|
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
|
||||||
I != MBB.end(); I = Next) {
|
|
||||||
Next = llvm::next(I);
|
|
||||||
MachineInstr &MI = *I;
|
|
||||||
switch (MI.getOpcode()) {
|
|
||||||
default: break;
|
|
||||||
case AMDGPU::S_MOV_IMM_I32:
|
|
||||||
case AMDGPU::V_MOV_IMM_F32:
|
|
||||||
case AMDGPU::V_MOV_IMM_I32: {
|
|
||||||
unsigned MovOpcode;
|
|
||||||
unsigned LoadLiteralOpcode;
|
|
||||||
MachineOperand LiteralOp = MI.getOperand(1);
|
|
||||||
if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
|
|
||||||
MovOpcode = AMDGPU::V_MOV_B32_e32;
|
|
||||||
} else {
|
|
||||||
MovOpcode = AMDGPU::S_MOV_B32;
|
|
||||||
}
|
|
||||||
if (LiteralOp.isImm()) {
|
|
||||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
|
|
||||||
} else {
|
|
||||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
|
|
||||||
}
|
|
||||||
MIBundleBuilder Bundle(MBB, I);
|
|
||||||
Bundle
|
|
||||||
.append(BuildMI(MF, MBB.findDebugLoc(I), TII->get(MovOpcode),
|
|
||||||
MI.getOperand(0).getReg())
|
|
||||||
.addReg(AMDGPU::SI_LITERAL_CONSTANT))
|
|
||||||
.append(BuildMI(MF, MBB.findDebugLoc(I),
|
|
||||||
TII->get(LoadLiteralOpcode))
|
|
||||||
.addOperand(MI.getOperand(1)));
|
|
||||||
llvm::finalizeBundle(MBB, Bundle.begin());
|
|
||||||
MI.eraseFromParent();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
@ -22,8 +22,6 @@ def EXEC_LO : SIReg <"EXEC LO", 126>;
|
|||||||
def EXEC_HI : SIReg <"EXEC HI", 127>;
|
def EXEC_HI : SIReg <"EXEC HI", 127>;
|
||||||
def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
|
def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
|
||||||
def SCC : SIReg<"SCC", 253>;
|
def SCC : SIReg<"SCC", 253>;
|
||||||
def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
|
|
||||||
def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>;
|
|
||||||
def M0 : SIReg <"M0", 124>;
|
def M0 : SIReg <"M0", 124>;
|
||||||
|
|
||||||
//Interpolation registers
|
//Interpolation registers
|
||||||
@ -136,7 +134,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
|||||||
|
|
||||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||||
(add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
|
(add SGPR_32, M0, EXEC_LO, EXEC_HI)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>;
|
def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user