diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 87a6d806d8a..4456d2ac2c9 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -569,6 +569,27 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, return isWorthFolding(N); } +/// If there's a use of this ADDlow that's not itself a load/store then we'll +/// need to create a real ADD instruction from it anyway and there's no point in +/// folding it into the mem op. Theoretically, it shouldn't matter, but there's +/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding +/// leads to duplaicated ADRP instructions. +static bool isWorthFoldingADDlow(SDValue N) { + for (auto Use : N->uses()) { + if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && + Use->getOpcode() != ISD::ATOMIC_LOAD && + Use->getOpcode() != ISD::ATOMIC_STORE) + return false; + + // ldar and stlr have much more restrictive addressing modes (just a + // register). + if (cast(Use)->getOrdering() > Monotonic) + return false; + } + + return true; +} + /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. @@ -582,7 +603,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, return true; } - if (N.getOpcode() == AArch64ISD::ADDlow) { + if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { GlobalAddressSDNode *GAN = dyn_cast(N.getOperand(1).getNode()); Base = N.getOperand(0); diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll index 3377849f669..642d72aac47 100644 --- a/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -29,8 +29,7 @@ define void @fetch_and_nand(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw nand i128* %p, i128 %bits release store i128 %val, i128* @var, align 16 ret void @@ -45,8 +44,7 @@ define void @fetch_and_or(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw or i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -61,8 +59,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw add i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -77,8 +74,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw sub i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -99,8 +95,7 @@ define void @fetch_and_min(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw min i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -121,8 +116,7 @@ define void @fetch_and_max(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw max i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -143,8 +137,7 @@ define void @fetch_and_umin(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw umin i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -165,8 +158,7 @@ define void @fetch_and_umax(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw umax i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 8e41304ffad..44f2af1c5e7 100644 --- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -12,6 +12,7 @@ define void @test_simple(i32 %n, ...) { ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]] ; ... omit middle ones ... @@ -21,11 +22,10 @@ define void @test_simple(i32 %n, ...) { ; ... omit middle ones ... ; CHECK: stp q6, q7, [sp, # -; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var] +; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56 -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp @@ -50,6 +50,7 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]] ; ... omit middle ones ... @@ -59,11 +60,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { ; ... omit middle ones ... ; CHECK: str q7, [sp, # -; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var] +; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40 -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp @@ -89,7 +89,8 @@ define void @test_nospare([8 x i64], [8 x float], ...) { call void @llvm.va_start(i8* %addr) ; CHECK-NOT: sub sp, sp ; CHECK: mov [[STACK:x[0-9]+]], sp -; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var] +; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var +; CHECK: str [[STACK]], [x[[VAR]]] ret void } @@ -100,7 +101,8 @@ define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: ; CHECK: sub sp, sp, #80 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 -; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var] +; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var +; CHECK: str [[STACK_TOP]], [x[[VAR]]] %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index abb732ccf43..9fc9a5f0190 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -96,10 +96,8 @@ define [2 x i64] @return_struct() { %addr = bitcast %myStruct* @varstruct to [2 x i64]* %val = load [2 x i64]* %addr ret [2 x i64] %val -; CHECK-DAG: ldr x0, [{{x[0-9]+}}, {{#?}}:lo12:varstruct] - ; Odd register regex below disallows x0 which we want to be live now. -; CHECK-DAG: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, {{#?}}:lo12:varstruct -; CHECK: ldr x1, [{{x[1-9][0-9]*}}, #8] +; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct +; CHECK: ldp x0, x1, [x[[VARSTRUCT]]] ; Make sure epilogue immediately follows ; CHECK-NEXT: ret } @@ -166,8 +164,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK-DAG: str x2, [{{x[0-9]+}}, {{#?}}:lo12:var128] -; CHECK-DAG: str x3, [{{x[0-9]+}}, #8] +; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 ; CHECK: mov x0, x4 diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 51979f0bbf5..16157f83aac 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -62,8 +62,8 @@ define void @simple_rets() { %arr = call [2 x i64] @return_smallstruct() store [2 x i64] %arr, [2 x i64]* @varsmallstruct ; CHECK: bl return_smallstruct -; CHECK: str x1, [{{x[0-9]+}}, #8] -; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct] +; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct +; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]] call void @return_large_struct(%myStruct* sret @varstruct) ; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct @@ -128,12 +128,12 @@ define void @check_i128_align() { call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 42, i128 %val) -; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128] -; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] +; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16] -; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128] -; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] +; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] ; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK: bl check_i128_stackalign