[SystemZ] Use MVC for simple load/store pairs

Look for patterns of the form (store (load ...), ...) in which the two
locations are known not to partially overlap.  (Identical locations are OK.)
These sequences are better implemented by MVC unless either the load or
the store could use RELATIVE LONG instructions.

The testcase showed that we weren't using LHRL and LGHRL for extload16,
only sextloadi16.  The patch fixes that too.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185919 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-07-09 09:46:39 +00:00
parent f6ea5e0d80
commit 2e015ef9bb
6 changed files with 506 additions and 2 deletions

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "SystemZTargetMachine.h" #include "SystemZTargetMachine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
@ -209,6 +210,8 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal); uint64_t UpperVal, uint64_t LowerVal);
bool storeLoadCanUseMVC(SDNode *N) const;
public: public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel), : SelectionDAGISel(TM, OptLevel),
@ -533,6 +536,49 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
return Or.getNode(); return Or.getNode();
} }
// N is a (store (load ...), ...) pattern. Return true if it can use MVC.
bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
StoreSDNode *Store = cast<StoreSDNode>(N);
LoadSDNode *Load = cast<LoadSDNode>(Store->getValue().getNode());
// MVC is logically a bytewise copy, so can't be used for volatile accesses.
if (Load->isVolatile() || Store->isVolatile())
return false;
// Prefer not to use MVC if either address can use ... RELATIVE LONG
// instructions.
assert(Load->getMemoryVT() == Store->getMemoryVT() &&
"Should already have checked that the types match");
uint64_t Size = Load->getMemoryVT().getStoreSize();
if (Size > 1 && Size <= 8) {
// Prefer LHRL, LRL and LGRL.
if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER)
return false;
// Prefer STHRL, STRL and STGRL.
if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER)
return false;
}
// There's no chance of overlap if the load is invariant.
if (Load->isInvariant())
return true;
// If both operands are aligned, they must be equal or not overlap.
if (Load->getAlignment() >= Size && Store->getAlignment() >= Size)
return true;
// Otherwise we need to check whether there's an alias.
const Value *V1 = Load->getSrcValue();
const Value *V2 = Store->getSrcValue();
if (!V1 || !V2)
return false;
int64_t End1 = Load->getSrcValueOffset() + Size;
int64_t End2 = Store->getSrcValueOffset() + Size;
return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
}
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected // Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");

View File

@ -75,6 +75,10 @@ def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
(EXTRACT_SUBREG FP128:$src2, subreg_high))>; (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
defm LoadStoreF32 : MVCLoadStore<load, store, f32, MVCWrapper, 4>;
defm LoadStoreF64 : MVCLoadStore<load, store, f64, MVCWrapper, 8>;
defm LoadStoreF128 : MVCLoadStore<load, store, f128, MVCWrapper, 16>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Load instructions // Load instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -294,6 +294,20 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in
[(z_mvc bdaddr12only:$dest, bdaddr12only:$src, [(z_mvc bdaddr12only:$dest, bdaddr12only:$src,
imm32len8:$length)]>; imm32len8:$length)]>;
defm LoadStore8_32 : MVCLoadStore<anyextloadi8, truncstorei8, i32,
MVCWrapper, 1>;
defm LoadStore16_32 : MVCLoadStore<anyextloadi16, truncstorei16, i32,
MVCWrapper, 2>;
defm LoadStore32_32 : MVCLoadStore<load, store, i32, MVCWrapper, 4>;
defm LoadStore8 : MVCLoadStore<anyextloadi8, truncstorei8, i64,
MVCWrapper, 1>;
defm LoadStore16 : MVCLoadStore<anyextloadi16, truncstorei16, i64,
MVCWrapper, 2>;
defm LoadStore32 : MVCLoadStore<anyextloadi32, truncstorei32, i64,
MVCWrapper, 4>;
defm LoadStore64 : MVCLoadStore<load, store, i64, MVCWrapper, 8>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Sign extensions // Sign extensions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -339,6 +353,14 @@ def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
let AddedComplexity = 7 in {
def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>;
def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Zero extensions // Zero extensions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -65,3 +65,17 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
(load mode:$src2), cls:$src1), (load mode:$src2), cls:$src1),
(insn cls:$src1, mode:$src2)>; (insn cls:$src1, mode:$src2)>;
} }
// Use MVC instruction INSN for a load of type LOAD followed by a store
// of type STORE. VT is the type of the intermediate register and LENGTH
// is the number of bytes to copy (which may be smaller than VT).
multiclass MVCLoadStore<SDPatternOperator load, SDPatternOperator store,
ValueType vt, Instruction insn, bits<5> length> {
def Pat : PatFrag<(ops node:$dest, node:$src),
(store (vt (load node:$src)), node:$dest),
[{ return storeLoadCanUseMVC(N); }]>;
def : Pat<(!cast<SDPatternOperator>(NAME##"Pat") bdaddr12only:$dest,
bdaddr12only:$src),
(insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
}

View File

@ -58,12 +58,13 @@ define double @f5(i64 %a) {
; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type, ; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type,
; so this goes through memory. ; so this goes through memory.
; FIXME: it would be better to use one MVC here.
define void @f6(fp128 *%a, i128 *%b) { define void @f6(fp128 *%a, i128 *%b) {
; CHECK: f6: ; CHECK: f6:
; CHECK: lg ; CHECK: lg
; CHECK: lg ; CHECK: mvc
; CHECK: stg
; CHECK: stg ; CHECK: stg
; CHECK: br %r14
%val = load i128 *%b %val = load i128 *%b
%res = bitcast i128 %val to fp128 %res = bitcast i128 %val to fp128
store fp128 %res, fp128 *%a store fp128 %res, fp128 *%a

View File

@ -0,0 +1,417 @@
; Test load/store pairs that act as memcpys.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g1 = global i8 1
@g2 = global i16 2
@g3 = global i32 3
@g4 = global i64 4
@g5 = external global fp128, align 16
; Test the simple i8 case.
define void @f1(i8 *%ptr1) {
; CHECK: f1:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i8 *%ptr1, i64 1
%val = load i8 *%ptr1
store i8 %val, i8 *%ptr2
ret void
}
; Test i8 cases where the value is zero-extended to 32 bits.
define void @f2(i8 *%ptr1) {
; CHECK: f2:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i8 *%ptr1, i64 1
%val = load i8 *%ptr1
%ext = zext i8 %val to i32
%trunc = trunc i32 %ext to i8
store i8 %trunc, i8 *%ptr2
ret void
}
; Test i8 cases where the value is zero-extended to 64 bits.
define void @f3(i8 *%ptr1) {
; CHECK: f3:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i8 *%ptr1, i64 1
%val = load i8 *%ptr1
%ext = zext i8 %val to i64
%trunc = trunc i64 %ext to i8
store i8 %trunc, i8 *%ptr2
ret void
}
; Test i8 cases where the value is sign-extended to 32 bits.
define void @f4(i8 *%ptr1) {
; CHECK: f4:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i8 *%ptr1, i64 1
%val = load i8 *%ptr1
%ext = sext i8 %val to i32
%trunc = trunc i32 %ext to i8
store i8 %trunc, i8 *%ptr2
ret void
}
; Test i8 cases where the value is sign-extended to 64 bits.
define void @f5(i8 *%ptr1) {
; CHECK: f5:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i8 *%ptr1, i64 1
%val = load i8 *%ptr1
%ext = sext i8 %val to i64
%trunc = trunc i64 %ext to i8
store i8 %trunc, i8 *%ptr2
ret void
}
; Test the simple i16 case.
define void @f6(i16 *%ptr1) {
; CHECK: f6:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i16 *%ptr1, i64 1
%val = load i16 *%ptr1
store i16 %val, i16 *%ptr2
ret void
}
; Test i16 cases where the value is zero-extended to 32 bits.
define void @f7(i16 *%ptr1) {
; CHECK: f7:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i16 *%ptr1, i64 1
%val = load i16 *%ptr1
%ext = zext i16 %val to i32
%trunc = trunc i32 %ext to i16
store i16 %trunc, i16 *%ptr2
ret void
}
; Test i16 cases where the value is zero-extended to 64 bits.
define void @f8(i16 *%ptr1) {
; CHECK: f8:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i16 *%ptr1, i64 1
%val = load i16 *%ptr1
%ext = zext i16 %val to i64
%trunc = trunc i64 %ext to i16
store i16 %trunc, i16 *%ptr2
ret void
}
; Test i16 cases where the value is sign-extended to 32 bits.
define void @f9(i16 *%ptr1) {
; CHECK: f9:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i16 *%ptr1, i64 1
%val = load i16 *%ptr1
%ext = sext i16 %val to i32
%trunc = trunc i32 %ext to i16
store i16 %trunc, i16 *%ptr2
ret void
}
; Test i16 cases where the value is sign-extended to 64 bits.
define void @f10(i16 *%ptr1) {
; CHECK: f10:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i16 *%ptr1, i64 1
%val = load i16 *%ptr1
%ext = sext i16 %val to i64
%trunc = trunc i64 %ext to i16
store i16 %trunc, i16 *%ptr2
ret void
}
; Test the simple i32 case.
define void @f11(i32 *%ptr1) {
; CHECK: f11:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i32 *%ptr1, i64 1
%val = load i32 *%ptr1
store i32 %val, i32 *%ptr2
ret void
}
; Test i32 cases where the value is zero-extended to 64 bits.
define void @f12(i32 *%ptr1) {
; CHECK: f12:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i32 *%ptr1, i64 1
%val = load i32 *%ptr1
%ext = zext i32 %val to i64
%trunc = trunc i64 %ext to i32
store i32 %trunc, i32 *%ptr2
ret void
}
; Test i32 cases where the value is sign-extended to 64 bits.
define void @f13(i32 *%ptr1) {
; CHECK: f13:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i32 *%ptr1, i64 1
%val = load i32 *%ptr1
%ext = sext i32 %val to i64
%trunc = trunc i64 %ext to i32
store i32 %trunc, i32 *%ptr2
ret void
}
; Test the i64 case.
define void @f14(i64 *%ptr1) {
; CHECK: f14:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i64 *%ptr1, i64 1
%val = load i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
; Test the f32 case.
define void @f15(float *%ptr1) {
; CHECK: f15:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr float *%ptr1, i64 1
%val = load float *%ptr1
store float %val, float *%ptr2
ret void
}
; Test the f64 case.
define void @f16(double *%ptr1) {
; CHECK: f16:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr double *%ptr1, i64 1
%val = load double *%ptr1
store double %val, double *%ptr2
ret void
}
; Test the f128 case.
define void @f17(fp128 *%ptr1) {
; CHECK: f17:
; CHECK: mvc 16(16,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr fp128 *%ptr1, i64 1
%val = load fp128 *%ptr1
store fp128 %val, fp128 *%ptr2
ret void
}
; Make sure that we don't use MVC if the load is volatile.
define void @f18(i64 *%ptr1) {
; CHECK: f18:
; CHECK-NOT: mvc
; CHECK: br %r14
%ptr2 = getelementptr i64 *%ptr1, i64 1
%val = load volatile i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
; ...likewise the store.
define void @f19(i64 *%ptr1) {
; CHECK: f19:
; CHECK-NOT: mvc
; CHECK: br %r14
%ptr2 = getelementptr i64 *%ptr1, i64 1
%val = load i64 *%ptr1
store volatile i64 %val, i64 *%ptr2
ret void
}
; Test that MVC is used for aligned loads and stores, even if there is
; no way of telling whether they alias.
define void @f20(i64 *%ptr1, i64 *%ptr2) {
; CHECK: f20:
; CHECK: mvc 0(8,%r3), 0(%r2)
; CHECK: br %r14
%val = load i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
; ...but if the loads aren't aligned, we can't be sure.
define void @f21(i64 *%ptr1, i64 *%ptr2) {
; CHECK: f21:
; CHECK-NOT: mvc
; CHECK: br %r14
%val = load i64 *%ptr1, align 2
store i64 %val, i64 *%ptr2, align 2
ret void
}
; Test a case where there is definite overlap.
define void @f22(i64 %base) {
; CHECK: f22:
; CHECK-NOT: mvc
; CHECK: br %r14
%add = add i64 %base, 1
%ptr1 = inttoptr i64 %base to i64 *
%ptr2 = inttoptr i64 %add to i64 *
%val = load i64 *%ptr1, align 1
store i64 %val, i64 *%ptr2, align 1
ret void
}
; Test that we can use MVC for global addresses for i8.
define void @f23(i8 *%ptr) {
; CHECK: f23:
; CHECK: larl [[REG:%r[0-5]]], g1
; CHECK: mvc 0(1,%r2), 0([[REG]])
; CHECK: br %r14
%val = load i8 *@g1
store i8 %val, i8 *%ptr
ret void
}
; ...and again with the global on the store.
define void @f24(i8 *%ptr) {
; CHECK: f24:
; CHECK: larl [[REG:%r[0-5]]], g1
; CHECK: mvc 0(1,[[REG]]), 0(%r2)
; CHECK: br %r14
%val = load i8 *%ptr
store i8 %val, i8 *@g1
ret void
}
; Test that we use LHRL for i16.
define void @f25(i16 *%ptr) {
; CHECK: f25:
; CHECK: lhrl [[REG:%r[0-5]]], g2
; CHECK: sth [[REG]], 0(%r2)
; CHECK: br %r14
%val = load i16 *@g2
store i16 %val, i16 *%ptr
ret void
}
; ...likewise STHRL.
define void @f26(i16 *%ptr) {
; CHECK: f26:
; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
; CHECK: sthrl [[REG]], g2
; CHECK: br %r14
%val = load i16 *%ptr
store i16 %val, i16 *@g2
ret void
}
; Test that we use LRL for i32.
define void @f27(i32 *%ptr) {
; CHECK: f27:
; CHECK: lrl [[REG:%r[0-5]]], g3
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%val = load i32 *@g3
store i32 %val, i32 *%ptr
ret void
}
; ...likewise STRL.
define void @f28(i32 *%ptr) {
; CHECK: f28:
; CHECK: l [[REG:%r[0-5]]], 0(%r2)
; CHECK: strl [[REG]], g3
; CHECK: br %r14
%val = load i32 *%ptr
store i32 %val, i32 *@g3
ret void
}
; Test that we use LGRL for i64.
define void @f29(i64 *%ptr) {
; CHECK: f29:
; CHECK: lgrl [[REG:%r[0-5]]], g4
; CHECK: stg [[REG]], 0(%r2)
; CHECK: br %r14
%val = load i64 *@g4
store i64 %val, i64 *%ptr
ret void
}
; ...likewise STGRL.
define void @f30(i64 *%ptr) {
; CHECK: f30:
; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
; CHECK: stgrl [[REG]], g4
; CHECK: br %r14
%val = load i64 *%ptr
store i64 %val, i64 *@g4
ret void
}
; Test that we can use MVC for global addresses for fp128.
define void @f31(fp128 *%ptr) {
; CHECK: f31:
; CHECK: larl [[REG:%r[0-5]]], g5
; CHECK: mvc 0(16,%r2), 0([[REG]])
; CHECK: br %r14
%val = load fp128 *@g5, align 16
store fp128 %val, fp128 *%ptr, align 16
ret void
}
; ...and again with the global on the store.
define void @f32(fp128 *%ptr) {
; CHECK: f32:
; CHECK: larl [[REG:%r[0-5]]], g5
; CHECK: mvc 0(16,[[REG]]), 0(%r2)
; CHECK: br %r14
%val = load fp128 *%ptr, align 16
store fp128 %val, fp128 *@g5, align 16
ret void
}
; Test a case where offset disambiguation is enough.
define void @f33(i64 *%ptr1) {
; CHECK: f33:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i64 *%ptr1, i64 1
%val = load i64 *%ptr1, align 1
store i64 %val, i64 *%ptr2, align 1
ret void
}
; Test f21 in cases where TBAA tells us there is no alias.
define void @f34(i64 *%ptr1, i64 *%ptr2) {
; CHECK: f34:
; CHECK: mvc 0(8,%r3), 0(%r2)
; CHECK: br %r14
%val = load i64 *%ptr1, align 2, !tbaa !1
store i64 %val, i64 *%ptr2, align 2, !tbaa !2
ret void
}
; Test f21 in cases where TBAA is present but doesn't help.
define void @f35(i64 *%ptr1, i64 *%ptr2) {
; CHECK: f35:
; CHECK-NOT: mvc
; CHECK: br %r14
%val = load i64 *%ptr1, align 2, !tbaa !1
store i64 %val, i64 *%ptr2, align 2, !tbaa !1
ret void
}
!0 = metadata !{ metadata !"root" }
!1 = metadata !{ metadata !"set1", metadata !0 }
!2 = metadata !{ metadata !"set2", metadata !0 }