mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-19 04:31:17 +00:00
[SystemZ] Use MVC to spill loads and stores
Try to use MVC when spilling the destination of a simple load or the source of a simple store. As explained in the comment, this doesn't yet handle the case where the load or store location is also a frame index, since that could lead to two simultaneous scavenger spills, something the backend can't handle yet. spill-02.py tests that this restriction kicks in, but unfortunately I've not yet found a case that would fail without it. The volatile trick I used for other scavenger tests doesn't work here because we can't use MVC for volatile accesses anyway. I'm planning on relaxing the restriction later, hopefully with a test that does trigger the problem... Tests @f8 and @f9 also showed that L(G)RL and ST(G)RL were wrongly classified as SimpleBDX{Load,Store}. It wouldn't be easy to test for that bug separately, which is why I didn't split out the fix as a separate patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185434 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9188443a2d
commit
1ce4894a3f
@ -13,6 +13,7 @@
|
||||
|
||||
#include "SystemZInstrInfo.h"
|
||||
#include "SystemZInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
@ -80,7 +81,8 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
|
||||
// Return 0 otherwise.
|
||||
//
|
||||
// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
|
||||
static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
|
||||
static int isSimpleMove(const MachineInstr *MI, int &FrameIndex,
|
||||
unsigned Flag) {
|
||||
const MCInstrDesc &MCID = MI->getDesc();
|
||||
if ((MCID.TSFlags & Flag) &&
|
||||
MI->getOperand(1).isFI() &&
|
||||
@ -315,6 +317,96 @@ SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
FrameIdx);
|
||||
}
|
||||
|
||||
// Return true if MI is a simple load or store with a 12-bit displacement
|
||||
// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
|
||||
static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
|
||||
const MCInstrDesc &MCID = MI->getDesc();
|
||||
return ((MCID.TSFlags & Flag) &&
|
||||
isUInt<12>(MI->getOperand(2).getImm()) &&
|
||||
MI->getOperand(3).getReg() == 0);
|
||||
}
|
||||
|
||||
// Return a MachineMemOperand for FrameIndex with flags MMOFlags.
|
||||
// Offset is the byte offset from the start of FrameIndex.
|
||||
static MachineMemOperand *getFrameMMO(MachineFunction &MF, int FrameIndex,
|
||||
uint64_t &Offset, unsigned MMOFlags) {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const Value *V = PseudoSourceValue::getFixedStack(FrameIndex);
|
||||
return MF.getMachineMemOperand(MachinePointerInfo(V, Offset), MMOFlags,
|
||||
MFI->getObjectSize(FrameIndex),
|
||||
MFI->getObjectAlignment(FrameIndex));
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
unsigned Size = MFI->getObjectSize(FrameIndex);
|
||||
|
||||
// Eary exit for cases we don't care about
|
||||
if (Ops.size() != 1)
|
||||
return 0;
|
||||
|
||||
unsigned OpNum = Ops[0];
|
||||
unsigned Reg = MI->getOperand(OpNum).getReg();
|
||||
unsigned RegSize = MF.getRegInfo().getRegClass(Reg)->getSize();
|
||||
assert(Size == RegSize && "Invalid size combination");
|
||||
|
||||
// Look for cases where the source of a simple store or the destination
|
||||
// of a simple load is being spilled. Try to use MVC instead.
|
||||
//
|
||||
// Although MVC is in practice a fast choice in these cases, it is still
|
||||
// logically a bytewise copy. This means that we cannot use it if the
|
||||
// load or store is volatile. It also means that the transformation is
|
||||
// not valid in cases where the two memories partially overlap; however,
|
||||
// that is not a problem here, because we know that one of the memories
|
||||
// is a full frame index.
|
||||
//
|
||||
// For now we punt if the load or store is also to a frame index.
|
||||
// In that case we might end up eliminating both of them to out-of-range
|
||||
// offsets, which might then force the register scavenger to spill two
|
||||
// other registers. The backend can only handle one such scavenger spill
|
||||
// at a time.
|
||||
if (OpNum == 0 && MI->hasOneMemOperand()) {
|
||||
MachineMemOperand *MMO = *MI->memoperands_begin();
|
||||
if (MMO->getSize() == Size && !MMO->isVolatile()) {
|
||||
// Handle conversion of loads.
|
||||
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) &&
|
||||
!MI->getOperand(1).isFI()) {
|
||||
uint64_t Offset = 0;
|
||||
MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
|
||||
MachineMemOperand::MOStore);
|
||||
return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
|
||||
.addFrameIndex(FrameIndex).addImm(Offset).addImm(Size)
|
||||
.addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
|
||||
.addMemOperand(FrameMMO).addMemOperand(MMO);
|
||||
}
|
||||
// Handle conversion of stores.
|
||||
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) &&
|
||||
!MI->getOperand(1).isFI()) {
|
||||
uint64_t Offset = 0;
|
||||
MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
|
||||
MachineMemOperand::MOLoad);
|
||||
return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
|
||||
.addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
|
||||
.addImm(Size).addFrameIndex(FrameIndex).addImm(Offset)
|
||||
.addMemOperand(MMO).addMemOperand(FrameMMO);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr* LoadMI) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
||||
switch (MI->getOpcode()) {
|
||||
|
@ -111,6 +111,14 @@ public:
|
||||
unsigned DestReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
|
||||
virtual MachineInstr *
|
||||
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const;
|
||||
virtual MachineInstr *
|
||||
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr* LoadMI) const;
|
||||
virtual bool
|
||||
expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE;
|
||||
virtual bool
|
||||
|
@ -242,11 +242,8 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
|
||||
|
||||
// Register loads.
|
||||
let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
|
||||
defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
|
||||
def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
|
||||
|
||||
def LG : UnaryRXY<"lg", 0xE304, load, GR64>;
|
||||
def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
|
||||
defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
|
||||
def LG : UnaryRXY<"lg", 0xE304, load, GR64>;
|
||||
|
||||
// These instructions are split after register allocation, so we don't
|
||||
// want a custom inserter.
|
||||
@ -255,16 +252,16 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
|
||||
[(set GR128:$dst, (load bdxaddr20only128:$src))]>;
|
||||
}
|
||||
}
|
||||
let canFoldAsLoad = 1 in {
|
||||
def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
|
||||
def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
|
||||
}
|
||||
|
||||
// Register stores.
|
||||
let SimpleBDXStore = 1 in {
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
|
||||
def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
|
||||
}
|
||||
|
||||
def STG : StoreRXY<"stg", 0xE324, store, GR64>;
|
||||
def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
|
||||
let isCodeGenOnly = 1 in
|
||||
defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
|
||||
def STG : StoreRXY<"stg", 0xE324, store, GR64>;
|
||||
|
||||
// These instructions are split after register allocation, so we don't
|
||||
// want a custom inserter.
|
||||
@ -273,6 +270,9 @@ let SimpleBDXStore = 1 in {
|
||||
[(store GR128:$src, bdxaddr20only128:$dst)]>;
|
||||
}
|
||||
}
|
||||
let isCodeGenOnly = 1 in
|
||||
def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
|
||||
def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
|
||||
|
||||
// 8-bit immediate stores to 8-bit fields.
|
||||
defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
|
||||
|
40
test/CodeGen/SystemZ/Large/spill-01.py
Normal file
40
test/CodeGen/SystemZ/Large/spill-01.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Test cases where MVC is used for spill slots that end up being out of range.
|
||||
# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
# There are 8 usable call-saved GPRs, two of which are needed for the base
|
||||
# registers. The first 160 bytes of the frame are needed for the ABI
|
||||
# call frame, and a further 8 bytes are needed for the emergency spill slot.
|
||||
# That means we will have at least one out-of-range slot if:
|
||||
#
|
||||
# count == (4096 - 168) / 8 + 6 + 1 == 498
|
||||
#
|
||||
# Add in some extra room and check both %r15+4096 (the first out-of-range slot)
|
||||
# and %r15+4104.
|
||||
#
|
||||
# CHECK: f1:
|
||||
# CHECK: lay [[REG:%r[0-5]]], 4096(%r15)
|
||||
# CHECK: mvc 0(8,[[REG]]), {{[0-9]+}}({{%r[0-9]+}})
|
||||
# CHECK: brasl %r14, foo@PLT
|
||||
# CHECK: lay [[REG:%r[0-5]]], 4096(%r15)
|
||||
# CHECK: mvc {{[0-9]+}}(8,{{%r[0-9]+}}), 8([[REG]])
|
||||
# CHECK: br %r14
|
||||
count = 500
|
||||
|
||||
print 'declare void @foo()'
|
||||
print ''
|
||||
print 'define void @f1(i64 *%base0, i64 *%base1) {'
|
||||
|
||||
for i in range(count):
|
||||
print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
|
||||
print ' %%val%d = load i64 *%%ptr%d' % (i, i)
|
||||
print ''
|
||||
|
||||
print ' call void @foo()'
|
||||
print ''
|
||||
|
||||
for i in range(count):
|
||||
print ' store i64 %%val%d, i64 *%%ptr%d' % (i, i)
|
||||
|
||||
print ''
|
||||
print ' ret void'
|
||||
print '}'
|
46
test/CodeGen/SystemZ/Large/spill-02.py
Normal file
46
test/CodeGen/SystemZ/Large/spill-02.py
Normal file
@ -0,0 +1,46 @@
|
||||
# Test cases where we spill from one frame index to another, both of which
|
||||
# would be out of range of MVC. At present we don't use MVC in this case.
|
||||
# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
# There are 8 usable call-saved GPRs. The first 160 bytes of the frame
|
||||
# are needed for the ABI call frame, and a further 8 bytes are needed
|
||||
# for the emergency spill slot. That means we will have at least one
|
||||
# out-of-range slot if:
|
||||
#
|
||||
# count == (4096 - 168) / 8 + 8 + 1 == 500
|
||||
#
|
||||
# Add in some extra just to be sure.
|
||||
#
|
||||
# CHECK: f1:
|
||||
# CHECK-NOT: mvc
|
||||
# CHECK: br %r14
|
||||
count = 510
|
||||
|
||||
print 'declare void @foo(i64 *%base0, i64 *%base1)'
|
||||
print ''
|
||||
print 'define void @f1() {'
|
||||
|
||||
for i in range(2):
|
||||
print ' %%alloc%d = alloca [%d x i64]' % (i, count / 2)
|
||||
print (' %%base%d = getelementptr [%d x i64] * %%alloc%d, i64 0, i64 0'
|
||||
% (i, count / 2, i))
|
||||
|
||||
print ' call void @foo(i64 *%base0, i64 *%base1)'
|
||||
print ''
|
||||
|
||||
for i in range(count):
|
||||
print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
|
||||
print ' %%val%d = load i64 *%%ptr%d' % (i, i)
|
||||
print ''
|
||||
|
||||
print ' call void @foo(i64 *%base0, i64 *%base1)'
|
||||
print ''
|
||||
|
||||
for i in range (count):
|
||||
print ' store i64 %%val%d, i64 *%%ptr%d' % (i, i)
|
||||
|
||||
print ''
|
||||
print ' call void @foo(i64 *%base0, i64 *%base1)'
|
||||
print ''
|
||||
print ' ret void'
|
||||
print '}'
|
383
test/CodeGen/SystemZ/spill-01.ll
Normal file
383
test/CodeGen/SystemZ/spill-01.ll
Normal file
@ -0,0 +1,383 @@
|
||||
; Test spilling using MVC.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
declare void @foo()
|
||||
|
||||
@g0 = global i32 0
|
||||
@g1 = global i32 1
|
||||
@g2 = global i32 2
|
||||
@g3 = global i32 3
|
||||
@g4 = global i32 4
|
||||
@g5 = global i32 5
|
||||
@g6 = global i32 6
|
||||
@g7 = global i32 7
|
||||
@g8 = global i32 8
|
||||
@g9 = global i32 9
|
||||
|
||||
@h0 = global i64 0
|
||||
@h1 = global i64 1
|
||||
@h2 = global i64 2
|
||||
@h3 = global i64 3
|
||||
@h4 = global i64 4
|
||||
@h5 = global i64 5
|
||||
@h6 = global i64 6
|
||||
@h7 = global i64 7
|
||||
@h8 = global i64 8
|
||||
@h9 = global i64 9
|
||||
|
||||
; This function shouldn't spill anything
|
||||
define void @f1(i32 *%ptr0) {
|
||||
; CHECK: f1:
|
||||
; CHECK: stmg
|
||||
; CHECK: aghi %r15, -160
|
||||
; CHECK-NOT: %r15
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK-NOT: %r15
|
||||
; CHECK: lmg
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr i32 *%ptr0, i32 2
|
||||
%ptr2 = getelementptr i32 *%ptr0, i32 4
|
||||
%ptr3 = getelementptr i32 *%ptr0, i32 6
|
||||
%ptr4 = getelementptr i32 *%ptr0, i32 8
|
||||
%ptr5 = getelementptr i32 *%ptr0, i32 10
|
||||
%ptr6 = getelementptr i32 *%ptr0, i32 12
|
||||
|
||||
%val0 = load i32 *%ptr0
|
||||
%val1 = load i32 *%ptr1
|
||||
%val2 = load i32 *%ptr2
|
||||
%val3 = load i32 *%ptr3
|
||||
%val4 = load i32 *%ptr4
|
||||
%val5 = load i32 *%ptr5
|
||||
%val6 = load i32 *%ptr6
|
||||
|
||||
call void @foo()
|
||||
|
||||
store i32 %val0, i32 *%ptr0
|
||||
store i32 %val1, i32 *%ptr1
|
||||
store i32 %val2, i32 *%ptr2
|
||||
store i32 %val3, i32 *%ptr3
|
||||
store i32 %val4, i32 *%ptr4
|
||||
store i32 %val5, i32 *%ptr5
|
||||
store i32 %val6, i32 *%ptr6
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a case where at least one i32 load and at least one i32 store
|
||||
; need spills.
|
||||
define void @f2(i32 *%ptr0) {
|
||||
; CHECK: f2:
|
||||
; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr i32 *%ptr0, i64 2
|
||||
%ptr2 = getelementptr i32 *%ptr0, i64 4
|
||||
%ptr3 = getelementptr i32 *%ptr0, i64 6
|
||||
%ptr4 = getelementptr i32 *%ptr0, i64 8
|
||||
%ptr5 = getelementptr i32 *%ptr0, i64 10
|
||||
%ptr6 = getelementptr i32 *%ptr0, i64 12
|
||||
%ptr7 = getelementptr i32 *%ptr0, i64 14
|
||||
%ptr8 = getelementptr i32 *%ptr0, i64 16
|
||||
|
||||
%val0 = load i32 *%ptr0
|
||||
%val1 = load i32 *%ptr1
|
||||
%val2 = load i32 *%ptr2
|
||||
%val3 = load i32 *%ptr3
|
||||
%val4 = load i32 *%ptr4
|
||||
%val5 = load i32 *%ptr5
|
||||
%val6 = load i32 *%ptr6
|
||||
%val7 = load i32 *%ptr7
|
||||
%val8 = load i32 *%ptr8
|
||||
|
||||
call void @foo()
|
||||
|
||||
store i32 %val0, i32 *%ptr0
|
||||
store i32 %val1, i32 *%ptr1
|
||||
store i32 %val2, i32 *%ptr2
|
||||
store i32 %val3, i32 *%ptr3
|
||||
store i32 %val4, i32 *%ptr4
|
||||
store i32 %val5, i32 *%ptr5
|
||||
store i32 %val6, i32 *%ptr6
|
||||
store i32 %val7, i32 *%ptr7
|
||||
store i32 %val8, i32 *%ptr8
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a case where at least one i64 load and at least one i64 store
|
||||
; need spills.
|
||||
define void @f3(i64 *%ptr0) {
|
||||
; CHECK: f3:
|
||||
; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr i64 *%ptr0, i64 2
|
||||
%ptr2 = getelementptr i64 *%ptr0, i64 4
|
||||
%ptr3 = getelementptr i64 *%ptr0, i64 6
|
||||
%ptr4 = getelementptr i64 *%ptr0, i64 8
|
||||
%ptr5 = getelementptr i64 *%ptr0, i64 10
|
||||
%ptr6 = getelementptr i64 *%ptr0, i64 12
|
||||
%ptr7 = getelementptr i64 *%ptr0, i64 14
|
||||
%ptr8 = getelementptr i64 *%ptr0, i64 16
|
||||
|
||||
%val0 = load i64 *%ptr0
|
||||
%val1 = load i64 *%ptr1
|
||||
%val2 = load i64 *%ptr2
|
||||
%val3 = load i64 *%ptr3
|
||||
%val4 = load i64 *%ptr4
|
||||
%val5 = load i64 *%ptr5
|
||||
%val6 = load i64 *%ptr6
|
||||
%val7 = load i64 *%ptr7
|
||||
%val8 = load i64 *%ptr8
|
||||
|
||||
call void @foo()
|
||||
|
||||
store i64 %val0, i64 *%ptr0
|
||||
store i64 %val1, i64 *%ptr1
|
||||
store i64 %val2, i64 *%ptr2
|
||||
store i64 %val3, i64 *%ptr3
|
||||
store i64 %val4, i64 *%ptr4
|
||||
store i64 %val5, i64 *%ptr5
|
||||
store i64 %val6, i64 *%ptr6
|
||||
store i64 %val7, i64 *%ptr7
|
||||
store i64 %val8, i64 *%ptr8
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Test a case where at least at least one f32 load and at least one f32 store
|
||||
; need spills. The 8 call-saved FPRs could be used for 8 of the %vals
|
||||
; (and are at the time of writing), but it would really be better to use
|
||||
; MVC for all 10.
|
||||
define void @f4(float *%ptr0) {
|
||||
; CHECK: f4:
|
||||
; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr float *%ptr0, i64 2
|
||||
%ptr2 = getelementptr float *%ptr0, i64 4
|
||||
%ptr3 = getelementptr float *%ptr0, i64 6
|
||||
%ptr4 = getelementptr float *%ptr0, i64 8
|
||||
%ptr5 = getelementptr float *%ptr0, i64 10
|
||||
%ptr6 = getelementptr float *%ptr0, i64 12
|
||||
%ptr7 = getelementptr float *%ptr0, i64 14
|
||||
%ptr8 = getelementptr float *%ptr0, i64 16
|
||||
%ptr9 = getelementptr float *%ptr0, i64 18
|
||||
|
||||
%val0 = load float *%ptr0
|
||||
%val1 = load float *%ptr1
|
||||
%val2 = load float *%ptr2
|
||||
%val3 = load float *%ptr3
|
||||
%val4 = load float *%ptr4
|
||||
%val5 = load float *%ptr5
|
||||
%val6 = load float *%ptr6
|
||||
%val7 = load float *%ptr7
|
||||
%val8 = load float *%ptr8
|
||||
%val9 = load float *%ptr9
|
||||
|
||||
call void @foo()
|
||||
|
||||
store float %val0, float *%ptr0
|
||||
store float %val1, float *%ptr1
|
||||
store float %val2, float *%ptr2
|
||||
store float %val3, float *%ptr3
|
||||
store float %val4, float *%ptr4
|
||||
store float %val5, float *%ptr5
|
||||
store float %val6, float *%ptr6
|
||||
store float %val7, float *%ptr7
|
||||
store float %val8, float *%ptr8
|
||||
store float %val9, float *%ptr9
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Similarly for f64.
|
||||
define void @f5(double *%ptr0) {
|
||||
; CHECK: f5:
|
||||
; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr double *%ptr0, i64 2
|
||||
%ptr2 = getelementptr double *%ptr0, i64 4
|
||||
%ptr3 = getelementptr double *%ptr0, i64 6
|
||||
%ptr4 = getelementptr double *%ptr0, i64 8
|
||||
%ptr5 = getelementptr double *%ptr0, i64 10
|
||||
%ptr6 = getelementptr double *%ptr0, i64 12
|
||||
%ptr7 = getelementptr double *%ptr0, i64 14
|
||||
%ptr8 = getelementptr double *%ptr0, i64 16
|
||||
%ptr9 = getelementptr double *%ptr0, i64 18
|
||||
|
||||
%val0 = load double *%ptr0
|
||||
%val1 = load double *%ptr1
|
||||
%val2 = load double *%ptr2
|
||||
%val3 = load double *%ptr3
|
||||
%val4 = load double *%ptr4
|
||||
%val5 = load double *%ptr5
|
||||
%val6 = load double *%ptr6
|
||||
%val7 = load double *%ptr7
|
||||
%val8 = load double *%ptr8
|
||||
%val9 = load double *%ptr9
|
||||
|
||||
call void @foo()
|
||||
|
||||
store double %val0, double *%ptr0
|
||||
store double %val1, double *%ptr1
|
||||
store double %val2, double *%ptr2
|
||||
store double %val3, double *%ptr3
|
||||
store double %val4, double *%ptr4
|
||||
store double %val5, double *%ptr5
|
||||
store double %val6, double *%ptr6
|
||||
store double %val7, double *%ptr7
|
||||
store double %val8, double *%ptr8
|
||||
store double %val9, double *%ptr9
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Repeat f2 with atomic accesses. We shouldn't use MVC here.
|
||||
define void @f6(i32 *%ptr0) {
|
||||
; CHECK: f6:
|
||||
; CHECK-NOT: mvc
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr i32 *%ptr0, i64 2
|
||||
%ptr2 = getelementptr i32 *%ptr0, i64 4
|
||||
%ptr3 = getelementptr i32 *%ptr0, i64 6
|
||||
%ptr4 = getelementptr i32 *%ptr0, i64 8
|
||||
%ptr5 = getelementptr i32 *%ptr0, i64 10
|
||||
%ptr6 = getelementptr i32 *%ptr0, i64 12
|
||||
%ptr7 = getelementptr i32 *%ptr0, i64 14
|
||||
%ptr8 = getelementptr i32 *%ptr0, i64 16
|
||||
|
||||
%val0 = load atomic i32 *%ptr0 unordered, align 4
|
||||
%val1 = load atomic i32 *%ptr1 unordered, align 4
|
||||
%val2 = load atomic i32 *%ptr2 unordered, align 4
|
||||
%val3 = load atomic i32 *%ptr3 unordered, align 4
|
||||
%val4 = load atomic i32 *%ptr4 unordered, align 4
|
||||
%val5 = load atomic i32 *%ptr5 unordered, align 4
|
||||
%val6 = load atomic i32 *%ptr6 unordered, align 4
|
||||
%val7 = load atomic i32 *%ptr7 unordered, align 4
|
||||
%val8 = load atomic i32 *%ptr8 unordered, align 4
|
||||
|
||||
call void @foo()
|
||||
|
||||
store atomic i32 %val0, i32 *%ptr0 unordered, align 4
|
||||
store atomic i32 %val1, i32 *%ptr1 unordered, align 4
|
||||
store atomic i32 %val2, i32 *%ptr2 unordered, align 4
|
||||
store atomic i32 %val3, i32 *%ptr3 unordered, align 4
|
||||
store atomic i32 %val4, i32 *%ptr4 unordered, align 4
|
||||
store atomic i32 %val5, i32 *%ptr5 unordered, align 4
|
||||
store atomic i32 %val6, i32 *%ptr6 unordered, align 4
|
||||
store atomic i32 %val7, i32 *%ptr7 unordered, align 4
|
||||
store atomic i32 %val8, i32 *%ptr8 unordered, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; ...likewise volatile accesses.
|
||||
define void @f7(i32 *%ptr0) {
|
||||
; CHECK: f7:
|
||||
; CHECK-NOT: mvc
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr i32 *%ptr0, i64 2
|
||||
%ptr2 = getelementptr i32 *%ptr0, i64 4
|
||||
%ptr3 = getelementptr i32 *%ptr0, i64 6
|
||||
%ptr4 = getelementptr i32 *%ptr0, i64 8
|
||||
%ptr5 = getelementptr i32 *%ptr0, i64 10
|
||||
%ptr6 = getelementptr i32 *%ptr0, i64 12
|
||||
%ptr7 = getelementptr i32 *%ptr0, i64 14
|
||||
%ptr8 = getelementptr i32 *%ptr0, i64 16
|
||||
|
||||
%val0 = load volatile i32 *%ptr0
|
||||
%val1 = load volatile i32 *%ptr1
|
||||
%val2 = load volatile i32 *%ptr2
|
||||
%val3 = load volatile i32 *%ptr3
|
||||
%val4 = load volatile i32 *%ptr4
|
||||
%val5 = load volatile i32 *%ptr5
|
||||
%val6 = load volatile i32 *%ptr6
|
||||
%val7 = load volatile i32 *%ptr7
|
||||
%val8 = load volatile i32 *%ptr8
|
||||
|
||||
call void @foo()
|
||||
|
||||
store volatile i32 %val0, i32 *%ptr0
|
||||
store volatile i32 %val1, i32 *%ptr1
|
||||
store volatile i32 %val2, i32 *%ptr2
|
||||
store volatile i32 %val3, i32 *%ptr3
|
||||
store volatile i32 %val4, i32 *%ptr4
|
||||
store volatile i32 %val5, i32 *%ptr5
|
||||
store volatile i32 %val6, i32 *%ptr6
|
||||
store volatile i32 %val7, i32 *%ptr7
|
||||
store volatile i32 %val8, i32 *%ptr8
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that LRL and STRL are not converted.
|
||||
define void @f8() {
|
||||
; CHECK: f8:
|
||||
; CHECK-NOT: mvc
|
||||
; CHECK: br %r14
|
||||
%val0 = load i32 *@g0
|
||||
%val1 = load i32 *@g1
|
||||
%val2 = load i32 *@g2
|
||||
%val3 = load i32 *@g3
|
||||
%val4 = load i32 *@g4
|
||||
%val5 = load i32 *@g5
|
||||
%val6 = load i32 *@g6
|
||||
%val7 = load i32 *@g7
|
||||
%val8 = load i32 *@g8
|
||||
%val9 = load i32 *@g9
|
||||
|
||||
call void @foo()
|
||||
|
||||
store i32 %val0, i32 *@g0
|
||||
store i32 %val1, i32 *@g1
|
||||
store i32 %val2, i32 *@g2
|
||||
store i32 %val3, i32 *@g3
|
||||
store i32 %val4, i32 *@g4
|
||||
store i32 %val5, i32 *@g5
|
||||
store i32 %val6, i32 *@g6
|
||||
store i32 %val7, i32 *@g7
|
||||
store i32 %val8, i32 *@g8
|
||||
store i32 %val9, i32 *@g9
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Likewise LGRL and STGRL.
|
||||
define void @f9() {
|
||||
; CHECK: f9:
|
||||
; CHECK-NOT: mvc
|
||||
; CHECK: br %r14
|
||||
%val0 = load i64 *@h0
|
||||
%val1 = load i64 *@h1
|
||||
%val2 = load i64 *@h2
|
||||
%val3 = load i64 *@h3
|
||||
%val4 = load i64 *@h4
|
||||
%val5 = load i64 *@h5
|
||||
%val6 = load i64 *@h6
|
||||
%val7 = load i64 *@h7
|
||||
%val8 = load i64 *@h8
|
||||
%val9 = load i64 *@h9
|
||||
|
||||
call void @foo()
|
||||
|
||||
store i64 %val0, i64 *@h0
|
||||
store i64 %val1, i64 *@h1
|
||||
store i64 %val2, i64 *@h2
|
||||
store i64 %val3, i64 *@h3
|
||||
store i64 %val4, i64 *@h4
|
||||
store i64 %val5, i64 *@h5
|
||||
store i64 %val6, i64 *@h6
|
||||
store i64 %val7, i64 *@h7
|
||||
store i64 %val8, i64 *@h8
|
||||
store i64 %val9, i64 *@h9
|
||||
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user