diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 6d1c160374e..bd08d2d8ad9 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -162,6 +162,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreParamU32: ResNode = SelectStoreParam(N); break; + case ISD::ADDRSPACECAST: + ResNode = SelectAddrSpaceCast(N); + break; default: break; } @@ -191,6 +194,66 @@ static unsigned int getCodeAddrSpace(MemSDNode *N, return NVPTX::PTXLdStInstCode::GENERIC; } +SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { + SDValue Src = N->getOperand(0); + AddrSpaceCastSDNode *CastN = cast(N); + unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); + unsigned DstAddrSpace = CastN->getDestAddressSpace(); + + assert(SrcAddrSpace != DstAddrSpace && + "addrspacecast must be between different address spaces"); + + if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { + // Specific to generic + unsigned Opc; + switch (SrcAddrSpace) { + default: report_fatal_error("Bad address space in addrspacecast"); + case ADDRESS_SPACE_GLOBAL: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64 + : NVPTX::cvta_global_yes; + break; + case ADDRESS_SPACE_SHARED: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64 + : NVPTX::cvta_shared_yes; + break; + case ADDRESS_SPACE_CONST: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64 + : NVPTX::cvta_const_yes; + break; + case ADDRESS_SPACE_LOCAL: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64 + : NVPTX::cvta_local_yes; + break; + } + return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); + } else { + // Generic to specific + if (SrcAddrSpace != 0) + report_fatal_error("Cannot cast between two non-generic address spaces"); + unsigned Opc; + switch (DstAddrSpace) { + default: report_fatal_error("Bad address space in addrspacecast"); + case ADDRESS_SPACE_GLOBAL: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64 + : NVPTX::cvta_to_global_yes; + break; + case ADDRESS_SPACE_SHARED: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64 + : NVPTX::cvta_to_shared_yes; + break; + case ADDRESS_SPACE_CONST: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64 + : NVPTX::cvta_to_const_yes; + break; + case ADDRESS_SPACE_LOCAL: + Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64 + : NVPTX::cvta_to_local_yes; + break; + } + return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); + } +} + SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDLoc dl(N); LoadSDNode *LD = cast(N); diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index a4a5abeb76c..93ad16911b5 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -67,6 +67,7 @@ private: SDNode *SelectLoadParam(SDNode *N); SDNode *SelectStoreRetval(SDNode *N); SDNode *SelectStoreParam(SDNode *N); + SDNode *SelectAddrSpaceCast(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/test/CodeGen/NVPTX/addrspacecast.ll b/test/CodeGen/NVPTX/addrspacecast.ll new file mode 100644 index 00000000000..98ea655969c --- /dev/null +++ b/test/CodeGen/NVPTX/addrspacecast.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefix=PTX64 + + +define i32 @conv1(i32 addrspace(1)* %ptr) { +; PTX32: conv1 +; PTX32: cvta.global.u32 +; PTX32: ld.u32 +; PTX64: conv1 +; PTX64: cvta.global.u64 +; PTX64: ld.u32 + %genptr = addrspacecast i32 addrspace(1)* %ptr to i32* + %val = load i32* %genptr + ret i32 %val +} + +define i32 @conv2(i32 addrspace(3)* %ptr) { +; PTX32: conv2 +; PTX32: cvta.shared.u32 +; PTX32: ld.u32 +; PTX64: conv2 +; PTX64: cvta.shared.u64 +; PTX64: ld.u32 + %genptr = addrspacecast i32 addrspace(3)* %ptr to i32* + %val = load i32* %genptr + ret i32 %val +} + +define i32 @conv3(i32 addrspace(4)* %ptr) { +; PTX32: conv3 +; PTX32: cvta.const.u32 +; PTX32: ld.u32 +; PTX64: conv3 +; PTX64: cvta.const.u64 +; PTX64: ld.u32 + %genptr = addrspacecast i32 addrspace(4)* %ptr to i32* + %val = load i32* %genptr + ret i32 %val +} + +define i32 @conv4(i32 addrspace(5)* %ptr) { +; PTX32: conv4 +; PTX32: cvta.local.u32 +; PTX32: ld.u32 +; PTX64: conv4 +; PTX64: cvta.local.u64 +; PTX64: ld.u32 + %genptr = addrspacecast i32 addrspace(5)* %ptr to i32* + %val = load i32* %genptr + ret i32 %val +} + +define i32 @conv5(i32* %ptr) { +; PTX32: conv5 +; PTX32: cvta.to.global.u32 +; PTX32: ld.global.u32 +; PTX64: conv5 +; PTX64: cvta.to.global.u64 +; PTX64: ld.global.u32 + %specptr = addrspacecast i32* %ptr to i32 addrspace(1)* + %val = load i32 addrspace(1)* %specptr + ret i32 %val +} + +define i32 @conv6(i32* %ptr) { +; PTX32: conv6 +; PTX32: cvta.to.shared.u32 +; PTX32: ld.shared.u32 +; PTX64: conv6 +; PTX64: cvta.to.shared.u64 +; PTX64: ld.shared.u32 + %specptr = addrspacecast i32* %ptr to i32 addrspace(3)* + %val = load i32 addrspace(3)* %specptr + ret i32 %val +} + +define i32 @conv7(i32* %ptr) { +; PTX32: conv7 +; PTX32: cvta.to.const.u32 +; PTX32: ld.const.u32 +; PTX64: conv7 +; PTX64: cvta.to.const.u64 +; PTX64: ld.const.u32 + %specptr = addrspacecast i32* %ptr to i32 addrspace(4)* + %val = load i32 addrspace(4)* %specptr + ret i32 %val +} + +define i32 @conv8(i32* %ptr) { +; PTX32: conv8 +; PTX32: cvta.to.local.u32 +; PTX32: ld.local.u32 +; PTX64: conv8 +; PTX64: cvta.to.local.u64 +; PTX64: ld.local.u32 + %specptr = addrspacecast i32* %ptr to i32 addrspace(5)* + %val = load i32 addrspace(5)* %specptr + ret i32 %val +}