mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-05 14:34:55 +00:00
R600/SI: Only select SALU instructions in the entry or exit block
SALU instructions ignore control flow, so it is not always safe to use them within branches. This is a partial solution to this problem until we can come up with something better. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207590 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
19a970b2da
commit
2a90e446c0
@ -16,9 +16,11 @@
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -70,6 +72,11 @@ private:
|
||||
bool isLocalLoad(const LoadSDNode *N) const;
|
||||
bool isRegionLoad(const LoadSDNode *N) const;
|
||||
|
||||
/// \returns True if the current basic block being selected is at control
|
||||
/// flow depth 0. Meaning that the current block dominates the
|
||||
// exit block.
|
||||
bool isCFDepth0() const;
|
||||
|
||||
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
|
||||
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
||||
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
|
||||
@ -565,6 +572,14 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isCFDepth0() const {
|
||||
// FIXME: Figure out a way to use DominatorTree analysis here.
|
||||
const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
|
||||
const Function *Fn = FuncInfo->Fn;
|
||||
return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
|
||||
}
|
||||
|
||||
|
||||
const char *AMDGPUDAGToDAGISel::getPassName() const {
|
||||
return "AMDGPU DAG->DAG Pattern Instruction Selection";
|
||||
}
|
||||
|
@ -32,14 +32,16 @@ def isSI : Predicate<"Subtarget.getGeneration() "
|
||||
def isCI : Predicate<"Subtarget.getGeneration() "
|
||||
">= AMDGPUSubtarget::SEA_ISLANDS">;
|
||||
|
||||
def WAIT_FLAG : InstFlag<"printWaitFlag">;
|
||||
def isCFDepth0 : Predicate<"isCFDepth0()">;
|
||||
|
||||
let Predicates = [isSI] in {
|
||||
def WAIT_FLAG : InstFlag<"printWaitFlag">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMRD Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isSI, isCFDepth0] in {
|
||||
|
||||
let mayLoad = 1 in {
|
||||
|
||||
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
|
||||
@ -76,10 +78,14 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
|
||||
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
|
||||
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
|
||||
|
||||
} // let Predicates = [isSI, isCFDepth0]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOP1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isSI, isCFDepth0] in {
|
||||
|
||||
let neverHasSideEffects = 1 in {
|
||||
|
||||
let isMoveImm = 1 in {
|
||||
@ -152,10 +158,14 @@ def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
|
||||
def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
|
||||
def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
|
||||
|
||||
} // let Predicates = [isSI, isCFDepth0]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOP2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isSI, isCFDepth0] in {
|
||||
|
||||
let Defs = [SCC] in { // Carry out goes to SCC
|
||||
let isCommutable = 1 in {
|
||||
def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
|
||||
@ -209,11 +219,6 @@ def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
|
||||
[(set i64:$dst, (and i64:$src0, i64:$src1))]
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (and i1:$src0, i1:$src1)),
|
||||
(S_AND_B64 $src0, $src1)
|
||||
>;
|
||||
|
||||
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
|
||||
[(set i32:$dst, (or i32:$src0, i32:$src1))]
|
||||
>;
|
||||
@ -222,17 +227,12 @@ def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
|
||||
[(set i64:$dst, (or i64:$src0, i64:$src1))]
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (or i1:$src0, i1:$src1)),
|
||||
(S_OR_B64 $src0, $src1)
|
||||
>;
|
||||
|
||||
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
|
||||
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
|
||||
[(set i1:$dst, (xor i1:$src0, i1:$src1))]
|
||||
[(set i64:$dst, (xor i64:$src0, i64:$src1))]
|
||||
>;
|
||||
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
|
||||
def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
|
||||
@ -279,10 +279,14 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
|
||||
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
|
||||
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
|
||||
|
||||
} // let Predicates = [isSI, isCFDepth0]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOPC Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isSI, isCFDepth0] in {
|
||||
|
||||
def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
|
||||
def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
|
||||
def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
|
||||
@ -301,10 +305,14 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
|
||||
////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
|
||||
//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
|
||||
|
||||
} // let Predicates = [isSI, isCFDepth0]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOPK Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isSI, isCFDepth0] in {
|
||||
|
||||
def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
|
||||
def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
|
||||
|
||||
@ -353,10 +361,14 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
|
||||
//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
|
||||
//def EXP : EXP_ <0x00000000, "EXP", []>;
|
||||
|
||||
} // let Predicates = [isSI, isCFDepth0]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOPP Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isSI] in {
|
||||
|
||||
//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
|
||||
|
||||
let isTerminator = 1 in {
|
||||
@ -449,6 +461,10 @@ let Uses = [EXEC] in {
|
||||
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
|
||||
} // End hasSideEffects
|
||||
|
||||
} // let Predicates = [isSI, isCFDepth0]
|
||||
|
||||
let Predicates = [isSI] in {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOPC Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1173,27 +1189,43 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
|
||||
|
||||
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
|
||||
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
|
||||
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
|
||||
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
|
||||
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
|
||||
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
|
||||
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
|
||||
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
|
||||
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
|
||||
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
|
||||
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
|
||||
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
|
||||
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
|
||||
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
|
||||
|
||||
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
|
||||
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
|
||||
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
|
||||
|
||||
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
|
||||
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
|
||||
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
|
||||
|
||||
let hasPostISelHook = 1 in {
|
||||
|
||||
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
|
||||
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
|
||||
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
}
|
||||
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
|
||||
|
||||
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
|
||||
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
|
||||
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
|
||||
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
|
||||
[(set i32:$dst, (and i32:$src0, i32:$src1))]>;
|
||||
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
|
||||
[(set i32:$dst, (or i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
|
||||
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
} // End isCommutable = 1
|
||||
|
||||
@ -1209,14 +1241,18 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
|
||||
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
|
||||
// No patterns so that the scalar instructions are always selected.
|
||||
// The scalar versions will be replaced with vector when needed later.
|
||||
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
|
||||
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
|
||||
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
|
||||
[(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>;
|
||||
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
|
||||
[(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>;
|
||||
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
|
||||
"V_SUB_I32">;
|
||||
|
||||
let Uses = [VCC] in { // Carry-in comes from VCC
|
||||
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
|
||||
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
|
||||
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32",
|
||||
[(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>;
|
||||
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32",
|
||||
[(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>;
|
||||
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
|
||||
"V_SUBB_U32">;
|
||||
} // End Uses = [VCC]
|
||||
@ -1592,6 +1628,45 @@ def : Pat <
|
||||
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOP2 Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat <
|
||||
(i1 (and i1:$src0, i1:$src1)),
|
||||
(S_AND_B64 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (or i1:$src0, i1:$src1)),
|
||||
(S_OR_B64 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (xor i1:$src0, i1:$src1)),
|
||||
(S_XOR_B64 $src0, $src1)
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat <
|
||||
(or i64:$src0, i64:$src1),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
||||
(V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0),
|
||||
(EXTRACT_SUBREG i64:$src1, sub0)), sub0),
|
||||
(V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1),
|
||||
(EXTRACT_SUBREG i64:$src1, sub1)), sub1)
|
||||
>;
|
||||
|
||||
class SextInReg <ValueType vt, int ShiftAmt> : Pat <
|
||||
(sext_inreg i32:$src0, vt),
|
||||
(V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
|
||||
>;
|
||||
|
||||
def : SextInReg <i8, 24>;
|
||||
def : SextInReg <i16, 16>;
|
||||
|
||||
/********** ======================= **********/
|
||||
/********** Image sampling patterns **********/
|
||||
|
27
test/CodeGen/R600/sgpr-control-flow.ll
Normal file
27
test/CodeGen/R600/sgpr-control-flow.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
;
|
||||
;
|
||||
; Most SALU instructions ignore control flow, so we need to make sure
|
||||
; they don't overwrite values from other blocks.
|
||||
|
||||
; SI-NOT: S_ADD
|
||||
|
||||
define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %a, 0
|
||||
br i1 %0, label %if, label %else
|
||||
|
||||
if:
|
||||
%1 = add i32 %b, %c
|
||||
br label %endif
|
||||
|
||||
else:
|
||||
%2 = add i32 %d, %e
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%3 = phi i32 [%1, %if], [%2, %else]
|
||||
%4 = add i32 %3, %a
|
||||
store i32 %4, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user