[SystemZ] Use MVC for memcpy

Use MVC for memcpy in cases where a single MVC is enough.  Using MVC is
a win for longer copies too, but I'll leave that for later.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185802 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-07-08 09:35:23 +00:00
parent 12b701beea
commit dff0009d0c
10 changed files with 227 additions and 2 deletions

View File

@ -22,6 +22,7 @@ add_llvm_target(SystemZCodeGen
SystemZLongBranch.cpp
SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp
SystemZSelectionDAGInfo.cpp
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
)

View File

@ -241,6 +241,12 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
MaxStoresPerMemcpyOptSize = 0;
MaxStoresPerMemmove = 0;
MaxStoresPerMemmoveOptSize = 0;
}
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@ -1579,6 +1585,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SDIVREM64);
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
OPCODE(MVC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@ -2143,6 +2150,26 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
return MBB;
}
MachineBasicBlock *
SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const SystemZInstrInfo *TII = TM.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineOperand DestBase = MI->getOperand(0);
uint64_t DestDisp = MI->getOperand(1).getImm();
MachineOperand SrcBase = MI->getOperand(2);
uint64_t SrcDisp = MI->getOperand(3).getImm();
uint64_t Length = MI->getOperand(4).getImm();
BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC))
.addOperand(DestBase).addImm(DestDisp).addImm(Length)
.addOperand(SrcBase).addImm(SrcDisp);
MI->eraseFromParent();
return MBB;
}
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@ -2376,6 +2403,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
MI->getOperand(1).getMBB()))
MI->eraseFromParent();
return MBB;
case SystemZ::MVCWrapper:
return emitMVCWrapper(MI, MBB);
default:
llvm_unreachable("Unexpected instr type to insert");
}

View File

@ -73,6 +73,13 @@ namespace SystemZISD {
UDIVREM32,
UDIVREM64,
// Use MVC to copy bytes from one memory location to another.
// The first operand is the target address, the second operand is the
// source address, and the third operand is the constant length.
// This isn't a memory opcode because we'd need to attach two
// MachineMemOperands rather than one.
MVC,
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@ -221,6 +228,8 @@ private:
unsigned BitSize) const;
MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *emitMVCWrapper(MachineInstr *MI,
MachineBasicBlock *BB) const;
};
} // end namespace llvm

View File

@ -288,6 +288,12 @@ let mayLoad = 1, mayStore = 1 in
bdaddr12only:$BD2),
"mvc\t$BDL1, $BD2", []>;
let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in
def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm32len8:$length),
[(z_mvc bdaddr12only:$dest, bdaddr12only:$src,
imm32len8:$length)]>;
//===----------------------------------------------------------------------===//
// Sign extensions
//===----------------------------------------------------------------------===//

View File

@ -219,6 +219,11 @@ def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">;
// i32 immediates
//===----------------------------------------------------------------------===//
// Immediates for 8-bit lengths.
def imm32len8 : Immediate<i32, [{
return isUInt<8>(N->getZExtValue() - 1);
}], NOOP_SDNodeXForm, "U32Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero.
def imm32ll16 : Immediate<i32, [{

View File

@ -52,6 +52,10 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>,
SDTCisVT<6, i32>]>;
def SDT_ZCopy : SDTypeProfile<0, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, i32>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@ -103,6 +107,9 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZCopy,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,46 @@
//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SystemZSelectionDAGInfo class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "systemz-selectiondag-info"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
SystemZSelectionDAGInfo::
SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
}
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
}
SDValue SystemZSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
if (IsVolatile)
return SDValue();
if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
if (Bytes >= 1 && Bytes <= 0x100) {
// A single MVC.
return DAG.getNode(SystemZISD::MVC, DL, MVT::Other,
Chain, Dst, Src, Size);
}
}
return SDValue();
}

View File

@ -0,0 +1,40 @@
//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
//
//===----------------------------------------------------------------------===//
#ifndef SYSTEMZSELECTIONDAGINFO_H
#define SYSTEMZSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
class SystemZTargetMachine;
class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
~SystemZSelectionDAGInfo();
virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const
LLVM_OVERRIDE;
};
}
#endif

View File

@ -20,10 +20,10 @@
#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSubtarget.h"
#include "SystemZSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
@ -32,7 +32,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
const DataLayout DL;
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
TargetSelectionDAGInfo TSInfo;
SystemZSelectionDAGInfo TSInfo;
SystemZFrameLowering FrameLowering;
public:

View File

@ -0,0 +1,82 @@
; Test memcpy using MVC.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
define void @f1(i8 *%dest, i8 *%src) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK-NOT: %r3
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1,
i1 false)
ret void
}
define void @f2(i8 *%dest, i8 *%src) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK-NOT: %r3
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1,
i1 false)
ret void
}
define void @f3(i8 *%dest, i8 *%src) {
; CHECK: f3:
; CHECK: mvc 0(1,%r2), 0(%r3)
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1,
i1 false)
ret void
}
define void @f4(i8 *%dest, i8 *%src) {
; CHECK: f4:
; CHECK: mvc 0(1,%r2), 0(%r3)
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1,
i1 false)
ret void
}
define void @f5(i8 *%dest, i8 *%src) {
; CHECK: f5:
; CHECK: mvc 0(256,%r2), 0(%r3)
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1,
i1 false)
ret void
}
define void @f6(i8 *%dest, i8 *%src) {
; CHECK: f6:
; CHECK: mvc 0(256,%r2), 0(%r3)
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1,
i1 false)
ret void
}
; 257 bytes is too big for a single MVC. For now expect none, so that
; the test fails and gets updated when large copies are implemented.
define void @f7(i8 *%dest, i8 *%src) {
; CHECK: f7:
; CHECK-NOT: mvc
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
i1 false)
ret void
}
define void @f8(i8 *%dest, i8 *%src) {
; CHECK: f8:
; CHECK-NOT: mvc
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 257, i32 1,
i1 false)
ret void
}