R600/SI: Only select SALU instructions in the entry or exit block

SALU instructions ignore control flow, so it is not always safe to use
them within branches.  This is a partial solution to this problem
until we can come up with something better.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207590 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-04-29 23:12:48 +00:00
parent 19a970b2da
commit 2a90e446c0
3 changed files with 144 additions and 27 deletions

View File

@ -16,9 +16,11 @@
#include "AMDGPURegisterInfo.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h"
using namespace llvm;
@ -70,6 +72,11 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
/// \returns True if the current basic block being selected is at control
/// flow depth 0. Meaning that the current block dominates the
// exit block.
bool isCFDepth0() const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
@ -565,6 +572,14 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
return false;
}
bool AMDGPUDAGToDAGISel::isCFDepth0() const {
// FIXME: Figure out a way to use DominatorTree analysis here.
const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
const Function *Fn = FuncInfo->Fn;
return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
}
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}

View File

@ -32,14 +32,16 @@ def isSI : Predicate<"Subtarget.getGeneration() "
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
def WAIT_FLAG : InstFlag<"printWaitFlag">;
def isCFDepth0 : Predicate<"isCFDepth0()">;
let Predicates = [isSI] in {
def WAIT_FLAG : InstFlag<"printWaitFlag">;
//===----------------------------------------------------------------------===//
// SMRD Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isSI, isCFDepth0] in {
let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
@ -76,10 +78,14 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
} // let Predicates = [isSI, isCFDepth0]
//===----------------------------------------------------------------------===//
// SOP1 Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isSI, isCFDepth0] in {
let neverHasSideEffects = 1 in {
let isMoveImm = 1 in {
@ -152,10 +158,14 @@ def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
} // let Predicates = [isSI, isCFDepth0]
//===----------------------------------------------------------------------===//
// SOP2 Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isSI, isCFDepth0] in {
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
@ -209,11 +219,6 @@ def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
def : Pat <
(i1 (and i1:$src0, i1:$src1)),
(S_AND_B64 $src0, $src1)
>;
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
@ -222,17 +227,12 @@ def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
[(set i64:$dst, (or i64:$src0, i64:$src1))]
>;
def : Pat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B64 $src0, $src1)
>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
[(set i1:$dst, (xor i1:$src0, i1:$src1))]
[(set i64:$dst, (xor i64:$src0, i64:$src1))]
>;
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
@ -279,10 +279,14 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
} // let Predicates = [isSI, isCFDepth0]
//===----------------------------------------------------------------------===//
// SOPC Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isSI, isCFDepth0] in {
def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
@ -301,10 +305,14 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
} // let Predicates = [isSI, isCFDepth0]
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isSI, isCFDepth0] in {
def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
@ -353,10 +361,14 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
//def EXP : EXP_ <0x00000000, "EXP", []>;
} // let Predicates = [isSI, isCFDepth0]
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
let Predicates = [isSI] in {
//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
let isTerminator = 1 in {
@ -449,6 +461,10 @@ let Uses = [EXEC] in {
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
} // End hasSideEffects
} // let Predicates = [isSI, isCFDepth0]
let Predicates = [isSI] in {
//===----------------------------------------------------------------------===//
// VOPC Instructions
//===----------------------------------------------------------------------===//
@ -1173,27 +1189,43 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
@ -1209,14 +1241,18 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
[(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>;
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
[(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
"V_SUB_I32">;
let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32",
[(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>;
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32",
[(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>;
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
"V_SUBB_U32">;
} // End Uses = [VCC]
@ -1592,6 +1628,45 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
def : Pat <
(i1 (and i1:$src0, i1:$src1)),
(S_AND_B64 $src0, $src1)
>;
def : Pat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B64 $src0, $src1)
>;
def : Pat <
(i1 (xor i1:$src0, i1:$src1)),
(S_XOR_B64 $src0, $src1)
>;
//===----------------------------------------------------------------------===//
// VOP2 Patterns
//===----------------------------------------------------------------------===//
def : Pat <
(or i64:$src0, i64:$src1),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0),
(EXTRACT_SUBREG i64:$src1, sub0)), sub0),
(V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1),
(EXTRACT_SUBREG i64:$src1, sub1)), sub1)
>;
class SextInReg <ValueType vt, int ShiftAmt> : Pat <
(sext_inreg i32:$src0, vt),
(V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
>;
def : SextInReg <i8, 24>;
def : SextInReg <i16, 16>;
/********** ======================= **********/
/********** Image sampling patterns **********/

View File

@ -0,0 +1,27 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
;
;
; Most SALU instructions ignore control flow, so we need to make sure
; they don't overwrite values from other blocks.
; SI-NOT: S_ADD
define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
%0 = icmp eq i32 %a, 0
br i1 %0, label %if, label %else
if:
%1 = add i32 %b, %c
br label %endif
else:
%2 = add i32 %d, %e
br label %endif
endif:
%3 = phi i32 [%1, %if], [%2, %else]
%4 = add i32 %3, %a
store i32 %4, i32 addrspace(1)* %out
ret void
}