mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-26 07:34:06 +00:00
AArch64: don't be too greedy when folding :lo12: accesses into mem ops.
This frequently leads to cases like: ldr xD, [xN, :lo12:var] add xA, xN, :lo12:var ldr xD, [xA, #8] where the ADD would have been needed anyway, and the two distinct addressing modes can prevent the formation of an ldp. Because of how we handle ADRP (aggressively forming an ADRP/ADD pseudo-inst at ISel time), this pattern also results in duplicated ADRP instructions (one on its own to cover the ldr, and one combined with the add). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223172 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
97be10d98f
commit
1dad6937c5
@ -569,6 +569,27 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
|
||||
return isWorthFolding(N);
|
||||
}
|
||||
|
||||
/// If there's a use of this ADDlow that's not itself a load/store then we'll
|
||||
/// need to create a real ADD instruction from it anyway and there's no point in
|
||||
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
|
||||
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
|
||||
/// leads to duplaicated ADRP instructions.
|
||||
static bool isWorthFoldingADDlow(SDValue N) {
|
||||
for (auto Use : N->uses()) {
|
||||
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
|
||||
Use->getOpcode() != ISD::ATOMIC_LOAD &&
|
||||
Use->getOpcode() != ISD::ATOMIC_STORE)
|
||||
return false;
|
||||
|
||||
// ldar and stlr have much more restrictive addressing modes (just a
|
||||
// register).
|
||||
if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
|
||||
/// immediate" address. The "Size" argument is the size in bytes of the memory
|
||||
/// reference, which determines the scale.
|
||||
@ -582,7 +603,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (N.getOpcode() == AArch64ISD::ADDlow) {
|
||||
if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
|
||||
GlobalAddressSDNode *GAN =
|
||||
dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
|
||||
Base = N.getOperand(0);
|
||||
|
@ -29,8 +29,7 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw nand i128* %p, i128 %bits release
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -45,8 +44,7 @@ define void @fetch_and_or(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw or i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -61,8 +59,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw add i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -77,8 +74,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw sub i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -99,8 +95,7 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw min i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -121,8 +116,7 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw max i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -143,8 +137,7 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw umin i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
@ -165,8 +158,7 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
|
||||
; CHECK-DAG: str [[DEST_REGHI]]
|
||||
; CHECK-DAG: str [[DEST_REGLO]]
|
||||
; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]]
|
||||
%val = atomicrmw umax i128* %p, i128 %bits seq_cst
|
||||
store i128 %val, i128* @var, align 16
|
||||
ret void
|
||||
|
@ -12,6 +12,7 @@ define void @test_simple(i32 %n, ...) {
|
||||
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
|
||||
|
||||
; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
|
||||
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
|
||||
; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
|
||||
; ... omit middle ones ...
|
||||
@ -21,11 +22,10 @@ define void @test_simple(i32 %n, ...) {
|
||||
; ... omit middle ones ...
|
||||
; CHECK: stp q6, q7, [sp, #
|
||||
|
||||
; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
|
||||
; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
|
||||
|
||||
; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
|
||||
; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
|
||||
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
|
||||
|
||||
; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
|
||||
@ -50,6 +50,7 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
|
||||
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
|
||||
|
||||
; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
|
||||
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
|
||||
; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
|
||||
; ... omit middle ones ...
|
||||
@ -59,11 +60,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
|
||||
; ... omit middle ones ...
|
||||
; CHECK: str q7, [sp, #
|
||||
|
||||
; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
|
||||
; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
|
||||
|
||||
; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
|
||||
; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
|
||||
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
|
||||
|
||||
; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
|
||||
@ -89,7 +89,8 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
|
||||
call void @llvm.va_start(i8* %addr)
|
||||
; CHECK-NOT: sub sp, sp
|
||||
; CHECK: mov [[STACK:x[0-9]+]], sp
|
||||
; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var]
|
||||
; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
; CHECK: str [[STACK]], [x[[VAR]]]
|
||||
|
||||
ret void
|
||||
}
|
||||
@ -100,7 +101,8 @@ define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
|
||||
; CHECK-LABEL: test_offsetstack:
|
||||
; CHECK: sub sp, sp, #80
|
||||
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
|
||||
; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var]
|
||||
; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
; CHECK: str [[STACK_TOP]], [x[[VAR]]]
|
||||
|
||||
%addr = bitcast %va_list* @var to i8*
|
||||
call void @llvm.va_start(i8* %addr)
|
||||
|
@ -96,10 +96,8 @@ define [2 x i64] @return_struct() {
|
||||
%addr = bitcast %myStruct* @varstruct to [2 x i64]*
|
||||
%val = load [2 x i64]* %addr
|
||||
ret [2 x i64] %val
|
||||
; CHECK-DAG: ldr x0, [{{x[0-9]+}}, {{#?}}:lo12:varstruct]
|
||||
; Odd register regex below disallows x0 which we want to be live now.
|
||||
; CHECK-DAG: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, {{#?}}:lo12:varstruct
|
||||
; CHECK: ldr x1, [{{x[1-9][0-9]*}}, #8]
|
||||
; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct
|
||||
; CHECK: ldp x0, x1, [x[[VARSTRUCT]]]
|
||||
; Make sure epilogue immediately follows
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
@ -166,8 +164,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
|
||||
define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) {
|
||||
; CHECK-LABEL: check_i128_regalign
|
||||
store i128 %val1, i128* @var128
|
||||
; CHECK-DAG: str x2, [{{x[0-9]+}}, {{#?}}:lo12:var128]
|
||||
; CHECK-DAG: str x3, [{{x[0-9]+}}, #8]
|
||||
; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
|
||||
; CHECK-DAG: stp x2, x3, [x[[VAR128]]]
|
||||
|
||||
ret i64 %val2
|
||||
; CHECK: mov x0, x4
|
||||
|
@ -62,8 +62,8 @@ define void @simple_rets() {
|
||||
%arr = call [2 x i64] @return_smallstruct()
|
||||
store [2 x i64] %arr, [2 x i64]* @varsmallstruct
|
||||
; CHECK: bl return_smallstruct
|
||||
; CHECK: str x1, [{{x[0-9]+}}, #8]
|
||||
; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct]
|
||||
; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct
|
||||
; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]]
|
||||
|
||||
call void @return_large_struct(%myStruct* sret @varstruct)
|
||||
; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct
|
||||
@ -128,12 +128,12 @@ define void @check_i128_align() {
|
||||
call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
|
||||
i32 4, i32 5, i32 6, i32 7,
|
||||
i32 42, i128 %val)
|
||||
; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128]
|
||||
; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
|
||||
; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
|
||||
; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
|
||||
; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
|
||||
|
||||
; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]
|
||||
; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
|
||||
; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
|
||||
; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
|
||||
; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
|
||||
; CHECK: bl check_i128_stackalign
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user