diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 968ca6c5413..36296f52e68 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -438,6 +438,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } + case ISD::STORE: { + // Handle i64 stores here for the same reason mentioned above for loads. + StoreSDNode *ST = cast(N); + SDValue Value = ST->getValue(); + if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) + break; + + SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), + MVT::v2i32, Value); + SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, + ST->getBasePtr(), ST->getMemOperand()); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); + + if (NewValue.getOpcode() == ISD::BITCAST) { + Select(NewStore.getNode()); + return SelectCode(NewValue.getNode()); + } + + // getNode() may fold the bitcast if its input was another bitcast. If that + // happens we should only select the new store. + N = NewStore.getNode(); + break; + } + case AMDGPUISD::REGISTER_LOAD: { if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d3aac9707ef..507e399b878 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -141,9 +141,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v2f32, Promote); AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); - setOperationAction(ISD::STORE, MVT::i64, Promote); - AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); - setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 1187ff246f6..54ac608e190 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -35,7 +35,8 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 -; SI: ds_write2_b32 +; SI: ds_write_b32 +; SI: ds_write_b32 ; SI: s_endpgm define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { %v = load i64 addrspace(3)* %p, align 1 @@ -52,7 +53,8 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace( ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte -; SI: buffer_store_dwordx2 +; SI: buffer_store_dword +; SI: buffer_store_dword define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { %v = load i64 addrspace(1)* %p, align 1 store i64 %v, i64 addrspace(1)* %r, align 1