mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
[AArch64]Add code to spill/fill Q register tuples such as QPair/QTriple/QQuad.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198193 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
43ffcc571c
commit
afcdbf7400
@ -418,10 +418,8 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown size for regclass");
|
llvm_unreachable("Unknown size for regclass");
|
||||||
}
|
}
|
||||||
} else {
|
} else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
|
||||||
assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
|
RC->hasType(MVT::f128)) {
|
||||||
RC->hasType(MVT::f128))
|
|
||||||
&& "Expected integer or floating type for store");
|
|
||||||
switch (RC->getSize()) {
|
switch (RC->getSize()) {
|
||||||
case 4: StoreOp = AArch64::LSFP32_STR; break;
|
case 4: StoreOp = AArch64::LSFP32_STR; break;
|
||||||
case 8: StoreOp = AArch64::LSFP64_STR; break;
|
case 8: StoreOp = AArch64::LSFP64_STR; break;
|
||||||
@ -429,6 +427,22 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown size for regclass");
|
llvm_unreachable("Unknown size for regclass");
|
||||||
}
|
}
|
||||||
|
} else { // The spill of D tuples is implemented by Q tuples
|
||||||
|
if (RC == &AArch64::QPairRegClass)
|
||||||
|
StoreOp = AArch64::ST1x2_16B;
|
||||||
|
else if (RC == &AArch64::QTripleRegClass)
|
||||||
|
StoreOp = AArch64::ST1x3_16B;
|
||||||
|
else if (RC == &AArch64::QQuadRegClass)
|
||||||
|
StoreOp = AArch64::ST1x4_16B;
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unknown reg class");
|
||||||
|
|
||||||
|
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
|
||||||
|
// Vector store has different operands from other store instructions.
|
||||||
|
NewMI.addFrameIndex(FrameIdx)
|
||||||
|
.addReg(SrcReg, getKillRegState(isKill))
|
||||||
|
.addMemOperand(MMO);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
|
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
|
||||||
@ -464,10 +478,8 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown size for regclass");
|
llvm_unreachable("Unknown size for regclass");
|
||||||
}
|
}
|
||||||
} else {
|
} else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
|
||||||
assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
|
RC->hasType(MVT::f128)) {
|
||||||
|| RC->hasType(MVT::f128))
|
|
||||||
&& "Expected integer or floating type for store");
|
|
||||||
switch (RC->getSize()) {
|
switch (RC->getSize()) {
|
||||||
case 4: LoadOp = AArch64::LSFP32_LDR; break;
|
case 4: LoadOp = AArch64::LSFP32_LDR; break;
|
||||||
case 8: LoadOp = AArch64::LSFP64_LDR; break;
|
case 8: LoadOp = AArch64::LSFP64_LDR; break;
|
||||||
@ -475,6 +487,21 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown size for regclass");
|
llvm_unreachable("Unknown size for regclass");
|
||||||
}
|
}
|
||||||
|
} else { // The spill of D tuples is implemented by Q tuples
|
||||||
|
if (RC == &AArch64::QPairRegClass)
|
||||||
|
LoadOp = AArch64::LD1x2_16B;
|
||||||
|
else if (RC == &AArch64::QTripleRegClass)
|
||||||
|
LoadOp = AArch64::LD1x3_16B;
|
||||||
|
else if (RC == &AArch64::QQuadRegClass)
|
||||||
|
LoadOp = AArch64::LD1x4_16B;
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unknown reg class");
|
||||||
|
|
||||||
|
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
|
||||||
|
// Vector load has different operands from other load instructions.
|
||||||
|
NewMI.addFrameIndex(FrameIdx)
|
||||||
|
.addMemOperand(MMO);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
|
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
|
||||||
@ -572,6 +599,21 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
|
|||||||
MinOffset = -0x40 * AccessScale;
|
MinOffset = -0x40 * AccessScale;
|
||||||
MaxOffset = 0x3f * AccessScale;
|
MaxOffset = 0x3f * AccessScale;
|
||||||
return;
|
return;
|
||||||
|
case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
|
||||||
|
AccessScale = 32;
|
||||||
|
MinOffset = 0;
|
||||||
|
MaxOffset = 0xfff * AccessScale;
|
||||||
|
return;
|
||||||
|
case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
|
||||||
|
AccessScale = 48;
|
||||||
|
MinOffset = 0;
|
||||||
|
MaxOffset = 0xfff * AccessScale;
|
||||||
|
return;
|
||||||
|
case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
|
||||||
|
AccessScale = 64;
|
||||||
|
MinOffset = 0;
|
||||||
|
MaxOffset = 0xfff * AccessScale;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,6 +76,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||||||
return Reserved;
|
return Reserved;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool hasFrameOffset(int opcode) {
|
||||||
|
return opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B &&
|
||||||
|
opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B &&
|
||||||
|
opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
|
AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
|
||||||
int SPAdj,
|
int SPAdj,
|
||||||
@ -110,8 +116,10 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
|
|||||||
int64_t Offset;
|
int64_t Offset;
|
||||||
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
|
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
|
||||||
IsCalleeSaveOp);
|
IsCalleeSaveOp);
|
||||||
|
// A vector load/store instruction doesn't have an offset operand.
|
||||||
Offset += MI.getOperand(FIOperandNum + 1).getImm();
|
bool HasOffsetOp = hasFrameOffset(MI.getOpcode());
|
||||||
|
if (HasOffsetOp)
|
||||||
|
Offset += MI.getOperand(FIOperandNum + 1).getImm();
|
||||||
|
|
||||||
// DBG_VALUE instructions have no real restrictions so they can be handled
|
// DBG_VALUE instructions have no real restrictions so they can be handled
|
||||||
// easily.
|
// easily.
|
||||||
@ -124,7 +132,7 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
|
|||||||
const AArch64InstrInfo &TII =
|
const AArch64InstrInfo &TII =
|
||||||
*static_cast<const AArch64InstrInfo*>(MF.getTarget().getInstrInfo());
|
*static_cast<const AArch64InstrInfo*>(MF.getTarget().getInstrInfo());
|
||||||
int MinOffset, MaxOffset, OffsetScale;
|
int MinOffset, MaxOffset, OffsetScale;
|
||||||
if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
|
if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s || !HasOffsetOp) {
|
||||||
MinOffset = 0;
|
MinOffset = 0;
|
||||||
MaxOffset = 0xfff;
|
MaxOffset = 0xfff;
|
||||||
OffsetScale = 1;
|
OffsetScale = 1;
|
||||||
@ -133,10 +141,12 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
|
|||||||
TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
|
TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The frame lowering has told us a base and offset it thinks we should use to
|
// There are two situations we don't use frame + offset directly in the
|
||||||
// access this variable, but it's still up to us to make sure the values are
|
// instruction:
|
||||||
// legal for the instruction in question.
|
// (1) The offset can't really be scaled
|
||||||
if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
|
// (2) Can't encode offset as it doesn't have an offset operand
|
||||||
|
if ((Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) ||
|
||||||
|
(!HasOffsetOp && Offset != 0)) {
|
||||||
unsigned BaseReg =
|
unsigned BaseReg =
|
||||||
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
||||||
emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
|
emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
|
||||||
@ -150,7 +160,8 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
|
|||||||
assert(Offset >= 0 && "Unexpected negative offset from SP");
|
assert(Offset >= 0 && "Unexpected negative offset from SP");
|
||||||
|
|
||||||
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
|
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
|
||||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
|
if (HasOffsetOp)
|
||||||
|
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
|
134
test/CodeGen/AArch64/neon-vector-list-spill.ll
Normal file
134
test/CodeGen/AArch64/neon-vector-list-spill.ll
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast
|
||||||
|
|
||||||
|
; FIXME: We should not generate ld/st for such register spill/fill, because the
|
||||||
|
; test case seems very simple and the register pressure is not high. If the
|
||||||
|
; spill/fill algorithm is optimized, this test case may not be triggered. And
|
||||||
|
; then we can delete it.
|
||||||
|
define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) {
|
||||||
|
; CHECK-LABEL: spill.DPairReg:
|
||||||
|
; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
entry:
|
||||||
|
%vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4)
|
||||||
|
%cmp = icmp eq i32 %arg2, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
tail call void @foo()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0
|
||||||
|
%res = extractelement <2 x i32> %vld.extract, i32 1
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) {
|
||||||
|
; CHECK-LABEL: spill.DTripleReg:
|
||||||
|
; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
entry:
|
||||||
|
%vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4)
|
||||||
|
%cmp = icmp eq i32 %arg2, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
tail call void @foo()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
|
||||||
|
%res = extractelement <4 x i16> %vld.extract, i32 1
|
||||||
|
ret i16 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) {
|
||||||
|
; CHECK-LABEL: spill.DQuadReg:
|
||||||
|
; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
entry:
|
||||||
|
%vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4)
|
||||||
|
%cmp = icmp eq i32 %arg2, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
tail call void @foo()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
|
||||||
|
%res = extractelement <4 x i16> %vld.extract, i32 0
|
||||||
|
ret i16 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) {
|
||||||
|
; CHECK-LABEL: spill.QPairReg:
|
||||||
|
; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
entry:
|
||||||
|
%vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4)
|
||||||
|
%cmp = icmp eq i32 %arg2, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
tail call void @foo()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
|
||||||
|
%res = extractelement <4 x i32> %vld.extract, i32 1
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @spill.QTripleReg(i8* %arg1, i32 %arg2) {
|
||||||
|
; CHECK-LABEL: spill.QTripleReg:
|
||||||
|
; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
entry:
|
||||||
|
%vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4)
|
||||||
|
%cmp = icmp eq i32 %arg2, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
tail call void @foo()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
|
||||||
|
%res = extractelement <4 x float> %vld3.extract, i32 1
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) {
|
||||||
|
; CHECK-LABEL: spill.QQuadReg:
|
||||||
|
; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
|
||||||
|
entry:
|
||||||
|
%vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4)
|
||||||
|
%cmp = icmp eq i32 %arg2, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
tail call void @foo()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0
|
||||||
|
%res = extractelement <16 x i8> %vld.extract, i32 1
|
||||||
|
ret i8 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
|
||||||
|
declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
|
||||||
|
declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
|
||||||
|
declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
|
||||||
|
declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
|
||||||
|
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
|
||||||
|
|
||||||
|
declare void @foo()
|
Loading…
Reference in New Issue
Block a user