diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index babd0e2eb1e..9629b70b470 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -45,6 +45,8 @@ static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { + Subtarget = &TM.getSubtarget(); + // Initialize target lowering borrowed from AMDIL InitAMDILLowering(); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index b53ba0a542f..7fa25905d01 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -21,9 +21,13 @@ namespace llvm { class AMDGPUMachineFunction; +class AMDGPUSubtarget; class MachineRegisterInfo; class AMDGPUTargetLowering : public TargetLowering { +protected: + const AMDGPUSubtarget *Subtarget; + private: void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, SmallVectorImpl &Args, diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1a49ccb2268..004957ce189 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -14,6 +14,7 @@ #include "SIISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -30,7 +31,6 @@ using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM) { - addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); @@ -175,8 +175,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : } } - setTargetDAGCombine(ISD::SELECT_CC); + for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) { + MVT::SimpleValueType VT = static_cast(I); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + } + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + } + + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); setSchedulingPreference(Sched::RegPressure); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9da05c34d7b..b45da5cb9ee 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -29,6 +29,9 @@ def SendMsgImm : Operand { def isSI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; +def isCI : Predicate<"Subtarget.getGeneration() " + ">= AMDGPUSubtarget::SEA_ISLANDS">; + def WAIT_FLAG : InstFlag<"printWaitFlag">; let Predicates = [isSI] in { @@ -2104,6 +2107,55 @@ def : MTBUF_StoreResource ; def : MTBUF_StoreResource ; def : MTBUF_StoreResource ; +let Predicates = [isCI] in { + +// Sea island new arithmetic instructinos +let neverHasSideEffects = 1 in { +defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64", + [(set f64:$dst, (ftrunc f64:$src0))] +>; +defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64", + [(set f64:$dst, (fceil f64:$src0))] +>; +defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64", + [(set f64:$dst, (ffloor f64:$src0))] +>; + +defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>; + +def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>; +def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>; +def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>; +def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>; + +// XXX - Does this set VCC? +def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>; +} // End neverHasSideEffects = 1 + +// Remaining instructions: +// FLAT_* +// S_CBRANCH_CDBGUSER +// S_CBRANCH_CDBGSYS +// S_CBRANCH_CDBGSYS_OR_USER +// S_CBRANCH_CDBGSYS_AND_USER +// S_DCACHE_INV_VOL +// V_EXP_LEGACY_F32 +// V_LOG_LEGACY_F32 +// DS_NOP +// DS_GWS_SEMA_RELEASE_ALL +// DS_WRAP_RTN_B32 +// DS_CNDXCHG32_RTN_B64 +// DS_WRITE_B96 +// DS_WRITE_B128 +// DS_CONDXCHG32_RTN_B128 +// DS_READ_B96 +// DS_READ_B128 +// BUFFER_LOAD_DWORDX3 +// BUFFER_STORE_DWORDX3 + +} // End Predicates = [isCI] + + /********** ====================== **********/ /********** Indirect adressing **********/ /********** ====================== **********/ diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll new file mode 100644 index 00000000000..b8b945f46ff --- /dev/null +++ b/test/CodeGen/R600/fceil.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s + +declare double @llvm.ceil.f64(double) nounwind readnone +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone + +; CI-LABEL: @fceil_f64: +; CI: V_CEIL_F64_e32 +define void @fceil_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.ceil.f64(double %x) nounwind readnone + store double %y, double addrspace(1)* %out + ret void +} + +; CI-LABEL: @fceil_v2f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; FIXME-CI-LABEL: @fceil_v3f64: +; FIXME-CI: V_CEIL_F64_e32 +; FIXME-CI: V_CEIL_F64_e32 +; FIXME-CI: V_CEIL_F64_e32 +; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone +; store <3 x double> %y, <3 x double> addrspace(1)* %out +; ret void +; } + +; CI-LABEL: @fceil_v4f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; CI-LABEL: @fceil_v8f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; CI-LABEL: @fceil_v16f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll new file mode 100644 index 00000000000..51d2b896150 --- /dev/null +++ b/test/CodeGen/R600/ffloor.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s + +declare double @llvm.floor.f64(double) nounwind readnone +declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone + +; CI-LABEL: @ffloor_f64: +; CI: V_FLOOR_F64_e32 +define void @ffloor_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.floor.f64(double %x) nounwind readnone + store double %y, double addrspace(1)* %out + ret void +} + +; CI-LABEL: @ffloor_v2f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; FIXME-CI-LABEL: @ffloor_v3f64: +; FIXME-CI: V_FLOOR_F64_e32 +; FIXME-CI: V_FLOOR_F64_e32 +; FIXME-CI: V_FLOOR_F64_e32 +; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone +; store <3 x double> %y, <3 x double> addrspace(1)* %out +; ret void +; } + +; CI-LABEL: @ffloor_v4f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; CI-LABEL: @ffloor_v8f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; CI-LABEL: @ffloor_v16f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll new file mode 100644 index 00000000000..6b235ffbd98 --- /dev/null +++ b/test/CodeGen/R600/ftrunc.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s + +declare double @llvm.trunc.f64(double) nounwind readnone +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone + +; CI-LABEL: @ftrunc_f64: +; CI: V_TRUNC_F64_e32 +define void @ftrunc_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.trunc.f64(double %x) nounwind readnone + store double %y, double addrspace(1)* %out + ret void +} + +; CI-LABEL: @ftrunc_v2f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; FIXME-CI-LABEL: @ftrunc_v3f64: +; FIXME-CI: V_TRUNC_F64_e32 +; FIXME-CI: V_TRUNC_F64_e32 +; FIXME-CI: V_TRUNC_F64_e32 +; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone +; store <3 x double> %y, <3 x double> addrspace(1)* %out +; ret void +; } + +; CI-LABEL: @ftrunc_v4f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; CI-LABEL: @ftrunc_v8f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; CI-LABEL: @ftrunc_v16f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +}