diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index ca981c38023..ba705db69de 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -16,9 +16,11 @@ #include "AMDGPURegisterInfo.h" #include "R600InstrInfo.h" #include "SIISelLowering.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Function.h" using namespace llvm; @@ -70,6 +72,11 @@ private: bool isLocalLoad(const LoadSDNode *N) const; bool isRegionLoad(const LoadSDNode *N) const; + /// \returns True if the current basic block being selected is at control + /// flow depth 0. Meaning that the current block dominates the + // exit block. + bool isCFDepth0() const; + const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, @@ -565,6 +572,14 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { return false; } +bool AMDGPUDAGToDAGISel::isCFDepth0() const { + // FIXME: Figure out a way to use DominatorTree analysis here. + const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock(); + const Function *Fn = FuncInfo->Fn; + return &Fn->front() == CurBlock || &Fn->back() == CurBlock; +} + + const char *AMDGPUDAGToDAGISel::getPassName() const { return "AMDGPU DAG->DAG Pattern Instruction Selection"; } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 80897f2f416..a8aefc22871 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -32,14 +32,16 @@ def isSI : Predicate<"Subtarget.getGeneration() " def isCI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SEA_ISLANDS">; -def WAIT_FLAG : InstFlag<"printWaitFlag">; +def isCFDepth0 : Predicate<"isCFDepth0()">; -let Predicates = [isSI] in { +def WAIT_FLAG : InstFlag<"printWaitFlag">; //===----------------------------------------------------------------------===// // SMRD Instructions //===----------------------------------------------------------------------===// +let Predicates = [isSI, isCFDepth0] in { + let mayLoad = 1 in { // We are using the SGPR_32 and not the SReg_32 register class for 32-bit @@ -76,10 +78,14 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; +} // let Predicates = [isSI, isCFDepth0] + //===----------------------------------------------------------------------===// // SOP1 Instructions //===----------------------------------------------------------------------===// +let Predicates = [isSI, isCFDepth0] in { + let neverHasSideEffects = 1 in { let isMoveImm = 1 in { @@ -152,10 +158,14 @@ def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; +} // let Predicates = [isSI, isCFDepth0] + //===----------------------------------------------------------------------===// // SOP2 Instructions //===----------------------------------------------------------------------===// +let Predicates = [isSI, isCFDepth0] in { + let Defs = [SCC] in { // Carry out goes to SCC let isCommutable = 1 in { def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; @@ -209,11 +219,6 @@ def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", [(set i64:$dst, (and i64:$src0, i64:$src1))] >; -def : Pat < - (i1 (and i1:$src0, i1:$src1)), - (S_AND_B64 $src0, $src1) ->; - def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", [(set i32:$dst, (or i32:$src0, i32:$src1))] >; @@ -222,17 +227,12 @@ def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", [(set i64:$dst, (or i64:$src0, i64:$src1))] >; -def : Pat < - (i1 (or i1:$src0, i1:$src1)), - (S_OR_B64 $src0, $src1) ->; - def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", [(set i32:$dst, (xor i32:$src0, i32:$src1))] >; def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", - [(set i1:$dst, (xor i1:$src0, i1:$src1))] + [(set i64:$dst, (xor i64:$src0, i64:$src1))] >; def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; @@ -279,10 +279,14 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; +} // let Predicates = [isSI, isCFDepth0] + //===----------------------------------------------------------------------===// // SOPC Instructions //===----------------------------------------------------------------------===// +let Predicates = [isSI, isCFDepth0] in { + def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">; def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">; def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">; @@ -301,10 +305,14 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">; ////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; //def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; +} // let Predicates = [isSI, isCFDepth0] + //===----------------------------------------------------------------------===// // SOPK Instructions //===----------------------------------------------------------------------===// +let Predicates = [isSI, isCFDepth0] in { + def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; @@ -353,10 +361,14 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; //def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; //def EXP : EXP_ <0x00000000, "EXP", []>; +} // let Predicates = [isSI, isCFDepth0] + //===----------------------------------------------------------------------===// // SOPP Instructions //===----------------------------------------------------------------------===// +let Predicates = [isSI] in { + //def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; let isTerminator = 1 in { @@ -449,6 +461,10 @@ let Uses = [EXEC] in { //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; } // End hasSideEffects +} // let Predicates = [isSI, isCFDepth0] + +let Predicates = [isSI] in { + //===----------------------------------------------------------------------===// // VOPC Instructions //===----------------------------------------------------------------------===// @@ -1173,27 +1189,43 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; -defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; -defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; -defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; -defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; +defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", + [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>; +defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", + [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>; +defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", + [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>; +defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", + [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>; + +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", + [(set i32:$dst, (srl i32:$src0, i32:$src1))] +>; -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", + [(set i32:$dst, (sra i32:$src0, i32:$src1))] +>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; let hasPostISelHook = 1 in { -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set i32:$dst, (shl i32:$src0, i32:$src1))] +>; } defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; -defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>; -defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>; -defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>; +defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", + [(set i32:$dst, (and i32:$src0, i32:$src1))]>; +defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", + [(set i32:$dst, (or i32:$src0, i32:$src1))] +>; +defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", + [(set i32:$dst, (xor i32:$src0, i32:$src1))] +>; } // End isCommutable = 1 @@ -1209,14 +1241,18 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. -defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>; -defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>; +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", + [(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>; +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", + [(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>; defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32, "V_SUB_I32">; let Uses = [VCC] in { // Carry-in comes from VCC -defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>; -defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>; +defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", + [(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>; +defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", + [(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>; defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32, "V_SUBB_U32">; } // End Uses = [VCC] @@ -1592,6 +1628,45 @@ def : Pat < (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; +//===----------------------------------------------------------------------===// +// SOP2 Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (i1 (and i1:$src0, i1:$src1)), + (S_AND_B64 $src0, $src1) +>; + +def : Pat < + (i1 (or i1:$src0, i1:$src1)), + (S_OR_B64 $src0, $src1) +>; + +def : Pat < + (i1 (xor i1:$src0, i1:$src1)), + (S_XOR_B64 $src0, $src1) +>; + +//===----------------------------------------------------------------------===// +// VOP2 Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (or i64:$src0, i64:$src1), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0), + (EXTRACT_SUBREG i64:$src1, sub0)), sub0), + (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1), + (EXTRACT_SUBREG i64:$src1, sub1)), sub1) +>; + +class SextInReg : Pat < + (sext_inreg i32:$src0, vt), + (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0)) +>; + +def : SextInReg ; +def : SextInReg ; /********** ======================= **********/ /********** Image sampling patterns **********/ diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll new file mode 100644 index 00000000000..06ad24d959c --- /dev/null +++ b/test/CodeGen/R600/sgpr-control-flow.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; +; +; Most SALU instructions ignore control flow, so we need to make sure +; they don't overwrite values from other blocks. + +; SI-NOT: S_ADD + +define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +entry: + %0 = icmp eq i32 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = add i32 %b, %c + br label %endif + +else: + %2 = add i32 %d, %e + br label %endif + +endif: + %3 = phi i32 [%1, %if], [%2, %else] + %4 = add i32 %3, %a + store i32 %4, i32 addrspace(1)* %out + ret void +}