mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
[ARM64] Adds Cortex-A53 scheduling support for vector load/store post.
Patch by Dave Estes<cestes@codeaurora.org>! PR19761 http://reviews.llvm.org/D3829 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209176 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f5d9170e67
commit
e55ae5f498
@ -841,10 +841,73 @@ bool ARM64InstrInfo::optimizeCompareInstr(
|
||||
}
|
||||
|
||||
/// Return true if this is this instruction has a non-zero immediate
|
||||
bool ARM64InstrInfo::hasNonZeroImm(const MachineInstr *MI) const {
|
||||
if (MI->getOperand(3).isImm()) {
|
||||
unsigned val = MI->getOperand(3).getImm();
|
||||
return (val != 0);
|
||||
bool ARM64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ARM64::ADDSWrs:
|
||||
case ARM64::ADDSXrs:
|
||||
case ARM64::ADDWrs:
|
||||
case ARM64::ADDXrs:
|
||||
case ARM64::ANDSWrs:
|
||||
case ARM64::ANDSXrs:
|
||||
case ARM64::ANDWrs:
|
||||
case ARM64::ANDXrs:
|
||||
case ARM64::BICSWrs:
|
||||
case ARM64::BICSXrs:
|
||||
case ARM64::BICWrs:
|
||||
case ARM64::BICXrs:
|
||||
case ARM64::CRC32Brr:
|
||||
case ARM64::CRC32CBrr:
|
||||
case ARM64::CRC32CHrr:
|
||||
case ARM64::CRC32CWrr:
|
||||
case ARM64::CRC32CXrr:
|
||||
case ARM64::CRC32Hrr:
|
||||
case ARM64::CRC32Wrr:
|
||||
case ARM64::CRC32Xrr:
|
||||
case ARM64::EONWrs:
|
||||
case ARM64::EONXrs:
|
||||
case ARM64::EORWrs:
|
||||
case ARM64::EORXrs:
|
||||
case ARM64::ORNWrs:
|
||||
case ARM64::ORNXrs:
|
||||
case ARM64::ORRWrs:
|
||||
case ARM64::ORRXrs:
|
||||
case ARM64::SUBSWrs:
|
||||
case ARM64::SUBSXrs:
|
||||
case ARM64::SUBWrs:
|
||||
case ARM64::SUBXrs:
|
||||
if (MI->getOperand(3).isImm()) {
|
||||
unsigned val = MI->getOperand(3).getImm();
|
||||
return (val != 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return true if this is this instruction has a non-zero immediate
|
||||
bool ARM64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ARM64::ADDSWrx:
|
||||
case ARM64::ADDSXrx:
|
||||
case ARM64::ADDSXrx64:
|
||||
case ARM64::ADDWrx:
|
||||
case ARM64::ADDXrx:
|
||||
case ARM64::ADDXrx64:
|
||||
case ARM64::SUBSWrx:
|
||||
case ARM64::SUBSXrx:
|
||||
case ARM64::SUBSXrx64:
|
||||
case ARM64::SUBWrx:
|
||||
case ARM64::SUBXrx:
|
||||
case ARM64::SUBXrx64:
|
||||
if (MI->getOperand(3).isImm()) {
|
||||
unsigned val = MI->getOperand(3).getImm();
|
||||
return (val != 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -56,8 +56,13 @@ public:
|
||||
unsigned isStoreToStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const override;
|
||||
|
||||
/// \brief Is there a non-zero immediate?
|
||||
bool hasNonZeroImm(const MachineInstr *MI) const;
|
||||
/// Returns true if there is a shiftable register and that the shift value
|
||||
/// is non-zero.
|
||||
bool hasShiftedReg(const MachineInstr *MI) const;
|
||||
|
||||
/// Returns true if there is an extendable register and that the extending value
|
||||
/// is non-zero.
|
||||
bool hasExtendedReg(const MachineInstr *MI) const;
|
||||
|
||||
/// \brief Does this instruction set its full destination register to zero?
|
||||
bool isGPRZero(const MachineInstr *MI) const;
|
||||
|
@ -148,7 +148,9 @@ def : ReadAdvance<ReadVLD, 0>;
|
||||
|
||||
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
|
||||
// operands are needed one cycle later if and only if they are to be
|
||||
// shifted. Otherwise, they too are needed two cycle later.
|
||||
// shifted. Otherwise, they too are needed two cycle later. This same
|
||||
// ReadAdvance applies to Extended registers as well, even though there is
|
||||
// a seperate SchedPredicate for them.
|
||||
def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
@ -167,7 +169,7 @@ def A53ReadISReg : SchedReadVariant<[
|
||||
def : SchedAlias<ReadISReg, A53ReadISReg>;
|
||||
|
||||
def A53ReadIEReg : SchedReadVariant<[
|
||||
SchedVar<RegShiftedPred, [A53ReadShifted]>,
|
||||
SchedVar<RegExtendedPred, [A53ReadShifted]>,
|
||||
SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
|
||||
def : SchedAlias<ReadIEReg, A53ReadIEReg>;
|
||||
|
||||
@ -196,64 +198,83 @@ def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
|
||||
//---
|
||||
def : InstRW<[WriteI], (instrs COPY)>;
|
||||
|
||||
//---
|
||||
// Vector Mul with Accumulate
|
||||
//---
|
||||
//def : InstRW<[WriteIM32, A53ReadIMA], (instregex "^M(ADD|SUB)W.*")>;
|
||||
//def : InstRW<[WriteIM64, A53ReadIMA], (instregex "^M(ADD|SUB)X.*")>;
|
||||
|
||||
//---
|
||||
// Vector Loads
|
||||
//---
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
|
||||
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2dq)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
|
||||
def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)$")>;
|
||||
def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)(_POST)?$")>;
|
||||
|
||||
def : InstRW<[A53WriteVLD1, A53WriteVLD1], (instregex "LDN?PS.*$")>;
|
||||
def : InstRW<[A53WriteVLD2, A53WriteVLD2], (instregex "LDN?PD.*$")>;
|
||||
def : InstRW<[A53WriteVLD4, A53WriteVLD4], (instregex "LDN?PQ.*$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
|
||||
def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>;
|
||||
def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
|
||||
|
||||
//---
|
||||
// Vector Stores
|
||||
//---
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)(_POST)?$")>;
|
||||
|
||||
def : InstRW<[A53WriteVST1], (instregex "STN?P(S|D).*$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "STN?PQ.*$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
|
||||
def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
|
||||
def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
|
||||
|
||||
//---
|
||||
// Floating Point MAC, DIV, SQRT
|
||||
|
@ -51,7 +51,10 @@ def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
|
||||
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
|
||||
|
||||
// Predicate for determining when a shiftable register is shifted.
|
||||
def RegShiftedPred : SchedPredicate<[{TII->hasNonZeroImm(MI)}]>;
|
||||
def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(MI)}]>;
|
||||
|
||||
// Predicate for determining when a extendedable register is extended.
|
||||
def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(MI)}]>;
|
||||
|
||||
// ScaledIdxPred is true if a WriteLDIdx operand will be
|
||||
// scaled. Subtargets can use this to dynamically select resources and
|
||||
|
@ -108,3 +108,18 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
|
||||
; Regression Test for Bug 19761
|
||||
; - [ARM64] Cortex-a53 schedule mode can't handle NEON post-increment load
|
||||
; - http://llvm.org/bugs/show_bug.cgi?id=19761
|
||||
;
|
||||
; Nothing explicit to check other than llc not crashing.
|
||||
define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
|
||||
%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
|
||||
%tmp = getelementptr i8* %A, i32 32
|
||||
store i8* %tmp, i8** %ptr
|
||||
ret { <16 x i8>, <16 x i8> } %ld2
|
||||
}
|
||||
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*)
|
||||
|
Loading…
Reference in New Issue
Block a user