R600/SI - Add new CI arithmetic instructions.

Does not yet include larger part required
to match v_mad_i64_i32 / v_mad_u64_u32.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202077 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-02-24 21:01:28 +00:00
parent 3f9f4bc0af
commit bc247e4afd
7 changed files with 324 additions and 2 deletions

View File

@ -45,6 +45,8 @@ static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
// Initialize target lowering borrowed from AMDIL
InitAMDILLowering();

View File

@ -21,9 +21,13 @@
namespace llvm {
class AMDGPUMachineFunction;
class AMDGPUSubtarget;
class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering {
protected:
const AMDGPUSubtarget *Subtarget;
private:
void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Args,

View File

@ -14,6 +14,7 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@ -30,7 +31,6 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
@ -175,8 +175,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
}
}
setTargetDAGCombine(ISD::SELECT_CC);
for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) {
MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I);
setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
}
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
}
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setSchedulingPreference(Sched::RegPressure);

View File

@ -29,6 +29,9 @@ def SendMsgImm : Operand<i32> {
def isSI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
def WAIT_FLAG : InstFlag<"printWaitFlag">;
let Predicates = [isSI] in {
@ -2104,6 +2107,55 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
let Predicates = [isCI] in {
// Sea island new arithmetic instructinos
let neverHasSideEffects = 1 in {
defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64",
[(set f64:$dst, (ftrunc f64:$src0))]
>;
defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
[(set f64:$dst, (fceil f64:$src0))]
>;
defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
[(set f64:$dst, (ffloor f64:$src0))]
>;
defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>;
def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>;
// XXX - Does this set VCC?
def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
} // End neverHasSideEffects = 1
// Remaining instructions:
// FLAT_*
// S_CBRANCH_CDBGUSER
// S_CBRANCH_CDBGSYS
// S_CBRANCH_CDBGSYS_OR_USER
// S_CBRANCH_CDBGSYS_AND_USER
// S_DCACHE_INV_VOL
// V_EXP_LEGACY_F32
// V_LOG_LEGACY_F32
// DS_NOP
// DS_GWS_SEMA_RELEASE_ALL
// DS_WRAP_RTN_B32
// DS_CNDXCHG32_RTN_B64
// DS_WRITE_B96
// DS_WRITE_B128
// DS_CONDXCHG32_RTN_B128
// DS_READ_B96
// DS_READ_B128
// BUFFER_LOAD_DWORDX3
// BUFFER_STORE_DWORDX3
} // End Predicates = [isCI]
/********** ====================== **********/
/********** Indirect adressing **********/
/********** ====================== **********/

View File

@ -0,0 +1,84 @@
; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
declare double @llvm.ceil.f64(double) nounwind readnone
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; CI-LABEL: @fceil_f64:
; CI: V_CEIL_F64_e32
define void @fceil_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
ret void
}
; CI-LABEL: @fceil_v2f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
ret void
}
; FIXME-CI-LABEL: @fceil_v3f64:
; FIXME-CI: V_CEIL_F64_e32
; FIXME-CI: V_CEIL_F64_e32
; FIXME-CI: V_CEIL_F64_e32
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; ret void
; }
; CI-LABEL: @fceil_v4f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
ret void
}
; CI-LABEL: @fceil_v8f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
ret void
}
; CI-LABEL: @fceil_v16f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
ret void
}

View File

@ -0,0 +1,84 @@
; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
declare double @llvm.floor.f64(double) nounwind readnone
declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
; CI-LABEL: @ffloor_f64:
; CI: V_FLOOR_F64_e32
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.floor.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
ret void
}
; CI-LABEL: @ffloor_v2f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
ret void
}
; FIXME-CI-LABEL: @ffloor_v3f64:
; FIXME-CI: V_FLOOR_F64_e32
; FIXME-CI: V_FLOOR_F64_e32
; FIXME-CI: V_FLOOR_F64_e32
; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; ret void
; }
; CI-LABEL: @ffloor_v4f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
ret void
}
; CI-LABEL: @ffloor_v8f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
ret void
}
; CI-LABEL: @ffloor_v16f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
ret void
}

View File

@ -0,0 +1,84 @@
; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
declare double @llvm.trunc.f64(double) nounwind readnone
declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
; CI-LABEL: @ftrunc_f64:
; CI: V_TRUNC_F64_e32
define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
ret void
}
; CI-LABEL: @ftrunc_v2f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
ret void
}
; FIXME-CI-LABEL: @ftrunc_v3f64:
; FIXME-CI: V_TRUNC_F64_e32
; FIXME-CI: V_TRUNC_F64_e32
; FIXME-CI: V_TRUNC_F64_e32
; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; ret void
; }
; CI-LABEL: @ftrunc_v4f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
ret void
}
; CI-LABEL: @ftrunc_v8f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
ret void
}
; CI-LABEL: @ftrunc_v16f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
ret void
}